LLVM 20.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallSet.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
60#include "llvm/IR/CallingConv.h"
61#include "llvm/IR/Constant.h"
62#include "llvm/IR/Constants.h"
63#include "llvm/IR/DataLayout.h"
64#include "llvm/IR/DebugLoc.h"
66#include "llvm/IR/Function.h"
67#include "llvm/IR/GlobalValue.h"
68#include "llvm/IR/IRBuilder.h"
70#include "llvm/IR/Intrinsics.h"
71#include "llvm/IR/IntrinsicsPowerPC.h"
72#include "llvm/IR/Module.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/Value.h"
76#include "llvm/MC/MCContext.h"
77#include "llvm/MC/MCExpr.h"
87#include "llvm/Support/Debug.h"
89#include "llvm/Support/Format.h"
95#include <algorithm>
96#include <cassert>
97#include <cstdint>
98#include <iterator>
99#include <list>
100#include <optional>
101#include <utility>
102#include <vector>
103
104using namespace llvm;
105
106#define DEBUG_TYPE "ppc-lowering"
107
109 "disable-p10-store-forward",
110 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
111 cl::init(false));
112
113static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
114cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
117cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
120cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
121
122static cl::opt<bool> DisableSCO("disable-ppc-sco",
123cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
124
125static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
126cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
127
128static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
129cl::desc("use absolute jump tables on ppc"), cl::Hidden);
130
131static cl::opt<bool>
132 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
133 cl::desc("disable vector permute decomposition"),
134 cl::init(true), cl::Hidden);
135
137 "disable-auto-paired-vec-st",
138 cl::desc("disable automatically generated 32byte paired vector stores"),
139 cl::init(true), cl::Hidden);
140
142 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
143 cl::desc("Set minimum number of entries to use a jump table on PPC"));
144
146 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
147 cl::desc("max depth when checking alias info in GatherAllAliases()"));
148
150 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
151 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
152 "function to use initial-exec"));
153
154STATISTIC(NumTailCalls, "Number of tail calls");
155STATISTIC(NumSiblingCalls, "Number of sibling calls");
156STATISTIC(ShufflesHandledWithVPERM,
157 "Number of shuffles lowered to a VPERM or XXPERM");
158STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
159
160static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
161
162static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
163
164static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
165
166// A faster local-[exec|dynamic] TLS access sequence (enabled with the
167// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
168// variables; consistent with the IBM XL compiler, we apply a max size of
169// slightly under 32KB.
171
172// FIXME: Remove this once the bug has been fixed!
174
176 const PPCSubtarget &STI)
177 : TargetLowering(TM), Subtarget(STI) {
178 // Initialize map that relates the PPC addressing modes to the computed flags
179 // of a load/store instruction. The map is used to determine the optimal
180 // addressing mode when selecting load and stores.
181 initializeAddrModeMap();
182 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
183 // arguments are at least 4/8 bytes aligned.
184 bool isPPC64 = Subtarget.isPPC64();
185 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
186
187 // Set up the register classes.
188 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
189 if (!useSoftFloat()) {
190 if (hasSPE()) {
191 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
192 // EFPU2 APU only supports f32
193 if (!Subtarget.hasEFPU2())
194 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
195 } else {
196 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
197 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
198 }
199 }
200
201 // Match BITREVERSE to customized fast code sequence in the td file.
204
205 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
206 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
207
208 // Custom lower inline assembly to check for special registers.
209 setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
210 setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
211
212 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
213 for (MVT VT : MVT::integer_valuetypes()) {
216 }
217
218 if (Subtarget.isISA3_0()) {
219 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
220 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
221 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
222 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
223 } else {
224 // No extending loads from f16 or HW conversions back and forth.
225 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
226 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
227 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
228 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
229 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
230 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
231 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
232 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
233 }
234
235 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
236
237 // PowerPC has pre-inc load and store's.
248 if (!Subtarget.hasSPE()) {
253 }
254
255 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
256 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
257 for (MVT VT : ScalarIntVTs) {
262 }
263
264 if (Subtarget.useCRBits()) {
266
267 if (isPPC64 || Subtarget.hasFPCVT()) {
270 isPPC64 ? MVT::i64 : MVT::i32);
273 isPPC64 ? MVT::i64 : MVT::i32);
274
277 isPPC64 ? MVT::i64 : MVT::i32);
280 isPPC64 ? MVT::i64 : MVT::i32);
281
284 isPPC64 ? MVT::i64 : MVT::i32);
287 isPPC64 ? MVT::i64 : MVT::i32);
288
291 isPPC64 ? MVT::i64 : MVT::i32);
294 isPPC64 ? MVT::i64 : MVT::i32);
295 } else {
300 }
301
302 // PowerPC does not support direct load/store of condition registers.
303 setOperationAction(ISD::LOAD, MVT::i1, Custom);
304 setOperationAction(ISD::STORE, MVT::i1, Custom);
305
306 // FIXME: Remove this once the ANDI glue bug is fixed:
307 if (ANDIGlueBug)
309
310 for (MVT VT : MVT::integer_valuetypes()) {
313 setTruncStoreAction(VT, MVT::i1, Expand);
314 }
315
316 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
317 }
318
319 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
320 // PPC (the libcall is not available).
325
326 // We do not currently implement these libm ops for PowerPC.
327 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
328 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
329 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
330 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
331 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
332 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
333
334 // PowerPC has no SREM/UREM instructions unless we are on P9
335 // On P9 we may use a hardware instruction to compute the remainder.
336 // When the result of both the remainder and the division is required it is
337 // more efficient to compute the remainder from the result of the division
338 // rather than use the remainder instruction. The instructions are legalized
339 // directly because the DivRemPairsPass performs the transformation at the IR
340 // level.
341 if (Subtarget.isISA3_0()) {
346 } else {
351 }
352
353 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
362
363 // Handle constrained floating-point operations of scalar.
364 // TODO: Handle SPE specific operation.
370
375
376 if (!Subtarget.hasSPE()) {
379 }
380
381 if (Subtarget.hasVSX()) {
384 }
385
386 if (Subtarget.hasFSQRT()) {
389 }
390
391 if (Subtarget.hasFPRND()) {
396
401 }
402
403 // We don't support sin/cos/sqrt/fmod/pow
404 setOperationAction(ISD::FSIN , MVT::f64, Expand);
405 setOperationAction(ISD::FCOS , MVT::f64, Expand);
406 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
408 setOperationAction(ISD::FPOW , MVT::f64, Expand);
409 setOperationAction(ISD::FSIN , MVT::f32, Expand);
410 setOperationAction(ISD::FCOS , MVT::f32, Expand);
411 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
413 setOperationAction(ISD::FPOW , MVT::f32, Expand);
414
415 // MASS transformation for LLVM intrinsics with replicating fast-math flag
416 // to be consistent to PPCGenScalarMASSEntries pass
417 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
418 setOperationAction(ISD::FSIN , MVT::f64, Custom);
419 setOperationAction(ISD::FCOS , MVT::f64, Custom);
420 setOperationAction(ISD::FPOW , MVT::f64, Custom);
421 setOperationAction(ISD::FLOG, MVT::f64, Custom);
422 setOperationAction(ISD::FLOG10, MVT::f64, Custom);
423 setOperationAction(ISD::FEXP, MVT::f64, Custom);
424 setOperationAction(ISD::FSIN , MVT::f32, Custom);
425 setOperationAction(ISD::FCOS , MVT::f32, Custom);
426 setOperationAction(ISD::FPOW , MVT::f32, Custom);
427 setOperationAction(ISD::FLOG, MVT::f32, Custom);
428 setOperationAction(ISD::FLOG10, MVT::f32, Custom);
429 setOperationAction(ISD::FEXP, MVT::f32, Custom);
430 }
431
432 if (Subtarget.hasSPE()) {
435 } else {
436 setOperationAction(ISD::FMA , MVT::f64, Legal);
437 setOperationAction(ISD::FMA , MVT::f32, Legal);
439 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
440 }
441
442 if (Subtarget.hasSPE())
443 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
444
445 // If we're enabling GP optimizations, use hardware square root
446 if (!Subtarget.hasFSQRT() &&
447 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
448 Subtarget.hasFRE()))
449 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
450
451 if (!Subtarget.hasFSQRT() &&
452 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
453 Subtarget.hasFRES()))
454 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
455
456 if (Subtarget.hasFCPSGN()) {
459 } else {
462 }
463
464 if (Subtarget.hasFPRND()) {
465 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
466 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
467 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
468 setOperationAction(ISD::FROUND, MVT::f64, Legal);
469
470 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
471 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
472 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
473 setOperationAction(ISD::FROUND, MVT::f32, Legal);
474 }
475
476 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
477 // instruction xxbrd to speed up scalar BSWAP64.
478 if (Subtarget.isISA3_1()) {
481 } else {
484 ISD::BSWAP, MVT::i64,
485 (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
486 }
487
488 // CTPOP or CTTZ were introduced in P8/P9 respectively
489 if (Subtarget.isISA3_0()) {
490 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
491 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
492 } else {
493 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
494 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
495 }
496
497 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
500 } else {
503 }
504
505 // PowerPC does not have ROTR
508
509 if (!Subtarget.useCRBits()) {
510 // PowerPC does not have Select
515 }
516
517 // PowerPC wants to turn select_cc of FP into fsel when possible.
520
521 // PowerPC wants to optimize integer setcc a bit
522 if (!Subtarget.useCRBits())
524
525 if (Subtarget.hasFPU()) {
529
533 }
534
535 // PowerPC does not have BRCOND which requires SetCC
536 if (!Subtarget.useCRBits())
537 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
538
539 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
540
541 if (Subtarget.hasSPE()) {
542 // SPE has built-in conversions
549
550 // SPE supports signaling compare of f32/f64.
553 } else {
554 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
557
558 // PowerPC does not have [U|S]INT_TO_FP
563 }
564
565 if (Subtarget.hasDirectMove() && isPPC64) {
566 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
567 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
568 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
569 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
570 if (TM.Options.UnsafeFPMath) {
571 setOperationAction(ISD::LRINT, MVT::f64, Legal);
572 setOperationAction(ISD::LRINT, MVT::f32, Legal);
573 setOperationAction(ISD::LLRINT, MVT::f64, Legal);
574 setOperationAction(ISD::LLRINT, MVT::f32, Legal);
575 setOperationAction(ISD::LROUND, MVT::f64, Legal);
576 setOperationAction(ISD::LROUND, MVT::f32, Legal);
577 setOperationAction(ISD::LLROUND, MVT::f64, Legal);
578 setOperationAction(ISD::LLROUND, MVT::f32, Legal);
579 }
580 } else {
581 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
582 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
583 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
584 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
585 }
586
587 // We cannot sextinreg(i1). Expand to shifts.
589
590 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
591 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
592 // support continuation, user-level threading, and etc.. As a result, no
593 // other SjLj exception interfaces are implemented and please don't build
594 // your own exception handling based on them.
595 // LLVM/Clang supports zero-cost DWARF exception handling.
598
599 // We want to legalize GlobalAddress and ConstantPool nodes into the
600 // appropriate instructions to materialize the address.
611
612 // TRAP is legal.
613 setOperationAction(ISD::TRAP, MVT::Other, Legal);
614
615 // TRAMPOLINE is custom lowered.
616 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
617 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
618
619 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
620 setOperationAction(ISD::VASTART , MVT::Other, Custom);
621
622 if (Subtarget.is64BitELFABI()) {
623 // VAARG always uses double-word chunks, so promote anything smaller.
624 setOperationAction(ISD::VAARG, MVT::i1, Promote);
625 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
626 setOperationAction(ISD::VAARG, MVT::i8, Promote);
627 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
628 setOperationAction(ISD::VAARG, MVT::i16, Promote);
629 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
630 setOperationAction(ISD::VAARG, MVT::i32, Promote);
631 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
632 setOperationAction(ISD::VAARG, MVT::Other, Expand);
633 } else if (Subtarget.is32BitELFABI()) {
634 // VAARG is custom lowered with the 32-bit SVR4 ABI.
635 setOperationAction(ISD::VAARG, MVT::Other, Custom);
636 setOperationAction(ISD::VAARG, MVT::i64, Custom);
637 } else
638 setOperationAction(ISD::VAARG, MVT::Other, Expand);
639
640 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
641 if (Subtarget.is32BitELFABI())
642 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
643 else
644 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
645
646 // Use the default implementation.
647 setOperationAction(ISD::VAEND , MVT::Other, Expand);
648 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
649 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
650 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
651 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
652 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
653 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
656
657 // We want to custom lower some of our intrinsics.
663
664 // To handle counter-based loop conditions.
666
671
672 // Comparisons that require checking two conditions.
673 if (Subtarget.hasSPE()) {
678 }
691
694
695 if (Subtarget.has64BitSupport()) {
696 // They also have instructions for converting between i64 and fp.
705 // This is just the low 32 bits of a (signed) fp->i64 conversion.
706 // We cannot do this with Promote because i64 is not a legal type.
709
710 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
713 }
714 } else {
715 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
716 if (Subtarget.hasSPE()) {
719 } else {
722 }
723 }
724
725 // With the instructions enabled under FPCVT, we can do everything.
726 if (Subtarget.hasFPCVT()) {
727 if (Subtarget.has64BitSupport()) {
736 }
737
746 }
747
748 if (Subtarget.use64BitRegs()) {
749 // 64-bit PowerPC implementations can support i64 types directly
750 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
751 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
753 // 64-bit PowerPC wants to expand i128 shifts itself.
757 } else {
758 // 32-bit PowerPC wants to expand i64 shifts itself.
762 }
763
764 // PowerPC has better expansions for funnel shifts than the generic
765 // TargetLowering::expandFunnelShift.
766 if (Subtarget.has64BitSupport()) {
769 }
772
773 if (Subtarget.hasVSX()) {
774 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
775 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
776 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
777 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
778 }
779
780 if (Subtarget.hasAltivec()) {
781 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
786 }
787 // First set operation action for all vector types to expand. Then we
788 // will selectively turn on ones that can be effectively codegen'd.
790 // add/sub are legal for all supported vector VT's.
793
794 // For v2i64, these are only valid with P8Vector. This is corrected after
795 // the loop.
796 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
801 }
802 else {
807 }
808
809 if (Subtarget.hasVSX()) {
810 setOperationAction(ISD::FMAXNUM, VT, Legal);
811 setOperationAction(ISD::FMINNUM, VT, Legal);
812 }
813
814 // Vector instructions introduced in P8
815 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
818 }
819 else {
822 }
823
824 // Vector instructions introduced in P9
825 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
827 else
829
830 // We promote all shuffles to v16i8.
832 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
833
834 // We promote all non-typed operations to v4i32.
836 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
838 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
840 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
841 setOperationAction(ISD::LOAD , VT, Promote);
842 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
844 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
847 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
848 setOperationAction(ISD::STORE, VT, Promote);
849 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
850
851 // No other operations are legal.
859 setOperationAction(ISD::FNEG, VT, Expand);
860 setOperationAction(ISD::FSQRT, VT, Expand);
861 setOperationAction(ISD::FLOG, VT, Expand);
862 setOperationAction(ISD::FLOG10, VT, Expand);
863 setOperationAction(ISD::FLOG2, VT, Expand);
864 setOperationAction(ISD::FEXP, VT, Expand);
865 setOperationAction(ISD::FEXP2, VT, Expand);
866 setOperationAction(ISD::FSIN, VT, Expand);
867 setOperationAction(ISD::FCOS, VT, Expand);
868 setOperationAction(ISD::FABS, VT, Expand);
869 setOperationAction(ISD::FFLOOR, VT, Expand);
870 setOperationAction(ISD::FCEIL, VT, Expand);
871 setOperationAction(ISD::FTRUNC, VT, Expand);
872 setOperationAction(ISD::FRINT, VT, Expand);
873 setOperationAction(ISD::FLDEXP, VT, Expand);
874 setOperationAction(ISD::FNEARBYINT, VT, Expand);
885 setOperationAction(ISD::FPOW, VT, Expand);
890
891 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
892 setTruncStoreAction(VT, InnerVT, Expand);
895 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
896 }
897 }
899 if (!Subtarget.hasP8Vector()) {
900 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
901 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
902 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
903 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
904 }
905
906 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
907 // with merges, splats, etc.
909
910 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
911 // are cheap, so handle them before they get expanded to scalar.
917
918 setOperationAction(ISD::AND , MVT::v4i32, Legal);
919 setOperationAction(ISD::OR , MVT::v4i32, Legal);
920 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
921 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
923 Subtarget.useCRBits() ? Legal : Expand);
924 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
933 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
934 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
935 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
936 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
937
938 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
939 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
940 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
941 if (Subtarget.hasAltivec())
942 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
944 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
945 if (Subtarget.hasP8Altivec())
946 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
947
948 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
949 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
950 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
951 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
952
953 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
954 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
955
956 if (Subtarget.hasVSX()) {
957 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
958 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
960 }
961
962 if (Subtarget.hasP8Altivec())
963 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
964 else
965 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
966
967 if (Subtarget.isISA3_1()) {
968 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
969 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
970 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
971 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
972 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
973 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
974 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
975 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
976 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
977 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
978 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
979 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
980 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
981 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
982 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
983 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
984 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
985 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
986 }
987
988 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
989 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
990
993 // LE is P8+/64-bit so direct moves are supported and these operations
994 // are legal. The custom transformation requires 64-bit since we need a
995 // pair of stores that will cover a 128-bit load for P10.
996 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1000 }
1001
1006
1007 // Altivec does not contain unordered floating-point compare instructions
1008 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1009 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1010 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1011 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1012
1013 if (Subtarget.hasVSX()) {
1016 if (Subtarget.hasP8Vector()) {
1019 }
1020 if (Subtarget.hasDirectMove() && isPPC64) {
1029 }
1031
1032 // The nearbyint variants are not allowed to raise the inexact exception
1033 // so we can only code-gen them with unsafe math.
1034 if (TM.Options.UnsafeFPMath) {
1035 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1036 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1037 }
1038
1039 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1040 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1041 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1042 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1043 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1044 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1045 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1046 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1047
1048 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1049 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1050 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1051 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1052 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1053
1054 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1055 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1056
1057 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1058 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1059
1060 // Share the Altivec comparison restrictions.
1061 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1062 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1063 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1064 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1065
1066 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1067 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1068
1070
1071 if (Subtarget.hasP8Vector())
1072 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1073
1074 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1075
1076 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1077 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1078 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1079
1080 if (Subtarget.hasP8Altivec()) {
1081 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1082 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1083 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1084
1085 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1086 // SRL, but not for SRA because of the instructions available:
1087 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1088 // doing
1089 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1090 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1091 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1092
1093 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1094 }
1095 else {
1096 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1097 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1098 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1099
1100 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1101
1102 // VSX v2i64 only supports non-arithmetic operations.
1103 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1104 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1105 }
1106
1107 if (Subtarget.isISA3_1())
1108 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1109 else
1110 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1111
1112 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1113 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1114 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1115 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1116
1118
1127
1128 // Custom handling for partial vectors of integers converted to
1129 // floating point. We already have optimal handling for v2i32 through
1130 // the DAG combine, so those aren't necessary.
1147
1148 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1149 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1150 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1151 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1154
1157
1158 // Handle constrained floating-point operations of vector.
1159 // The predictor is `hasVSX` because altivec instruction has
1160 // no exception but VSX vector instruction has.
1174
1188
1189 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1190 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1191
1192 for (MVT FPT : MVT::fp_valuetypes())
1193 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1194
1195 // Expand the SELECT to SELECT_CC
1197
1198 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1199 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1200
1201 // No implementation for these ops for PowerPC.
1202 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
1203 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1204 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1205 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1206 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1207 setOperationAction(ISD::FREM, MVT::f128, Expand);
1208 }
1209
1210 if (Subtarget.hasP8Altivec()) {
1211 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1212 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1213 }
1214
1215 if (Subtarget.hasP9Vector()) {
1218
1219 // Test data class instructions store results in CR bits.
1220 if (Subtarget.useCRBits()) {
1225 }
1226
1227 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1228 // SRL, but not for SRA because of the instructions available:
1229 // VS{RL} and VS{RL}O.
1230 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1231 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1232 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1233
1234 setOperationAction(ISD::FADD, MVT::f128, Legal);
1235 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1236 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1237 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1238 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1239
1240 setOperationAction(ISD::FMA, MVT::f128, Legal);
1247
1248 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1249 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1250 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1251 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1252 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1253 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1254
1257 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1258
1259 // Handle constrained floating-point operations of fp128
1275 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1276 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1277 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1278 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1279 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1280 } else if (Subtarget.hasVSX()) {
1281 setOperationAction(ISD::LOAD, MVT::f128, Promote);
1282 setOperationAction(ISD::STORE, MVT::f128, Promote);
1283
1284 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1285 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1286
1287 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1288 // fp_to_uint and int_to_fp.
1291
1292 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1293 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1294 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1295 setOperationAction(ISD::FABS, MVT::f128, Expand);
1296 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1297 setOperationAction(ISD::FMA, MVT::f128, Expand);
1299
1300 // Expand the fp_extend if the target type is fp128.
1301 setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1303
1304 // Expand the fp_round if the source type is fp128.
1305 for (MVT VT : {MVT::f32, MVT::f64}) {
1308 }
1309
1313 setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1314
1315 // Lower following f128 select_cc pattern:
1316 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1318
1319 // We need to handle f128 SELECT_CC with integer result type.
1321 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1322 }
1323
1324 if (Subtarget.hasP9Altivec()) {
1325 if (Subtarget.isISA3_1()) {
1330 } else {
1333 }
1341
1342 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1343 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1344 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1345 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1346 }
1347
1348 if (Subtarget.hasP10Vector()) {
1350 }
1351 }
1352
1353 if (Subtarget.pairedVectorMemops()) {
1354 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1355 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1356 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1357 }
1358 if (Subtarget.hasMMA()) {
1359 if (Subtarget.isISAFuture())
1360 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1361 else
1362 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1363 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1364 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1366 }
1367
1368 if (Subtarget.has64BitSupport())
1369 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1370
1371 if (Subtarget.isISA3_1())
1372 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1373
1374 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1375
1376 if (!isPPC64) {
1377 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1378 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1379 }
1380
1382 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1383 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1385 }
1386
1388
1389 if (Subtarget.hasAltivec()) {
1390 // Altivec instructions set fields to all zeros or all ones.
1392 }
1393
1396 else if (isPPC64)
1398 else
1400
1401 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1402
1403 // We have target-specific dag combine patterns for the following nodes:
1406 if (Subtarget.hasFPCVT())
1408 setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1409 if (Subtarget.useCRBits())
1410 setTargetDAGCombine(ISD::BRCOND);
1413
1415
1417
1418 if (Subtarget.useCRBits()) {
1420 }
1421
1422 setLibcallName(RTLIB::LOG_F128, "logf128");
1423 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1424 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1425 setLibcallName(RTLIB::EXP_F128, "expf128");
1426 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1427 setLibcallName(RTLIB::SIN_F128, "sinf128");
1428 setLibcallName(RTLIB::COS_F128, "cosf128");
1429 setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1430 setLibcallName(RTLIB::POW_F128, "powf128");
1431 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1432 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1433 setLibcallName(RTLIB::REM_F128, "fmodf128");
1434 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1435 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1436 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1437 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1438 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1439 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1440 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1441 setLibcallName(RTLIB::RINT_F128, "rintf128");
1442 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1443 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1444 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1445 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1446 setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1447
1448 if (Subtarget.isAIXABI()) {
1449 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1450 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1451 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1452 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1453 }
1454
1455 // With 32 condition bits, we don't need to sink (and duplicate) compares
1456 // aggressively in CodeGenPrep.
1457 if (Subtarget.useCRBits()) {
1460 }
1461
1462 // TODO: The default entry number is set to 64. This stops most jump table
1463 // generation on PPC. But it is good for current PPC HWs because the indirect
1464 // branch instruction mtctr to the jump table may lead to bad branch predict.
1465 // Re-evaluate this value on future HWs that can do better with mtctr.
1467
1469
1470 switch (Subtarget.getCPUDirective()) {
1471 default: break;
1472 case PPC::DIR_970:
1473 case PPC::DIR_A2:
1474 case PPC::DIR_E500:
1475 case PPC::DIR_E500mc:
1476 case PPC::DIR_E5500:
1477 case PPC::DIR_PWR4:
1478 case PPC::DIR_PWR5:
1479 case PPC::DIR_PWR5X:
1480 case PPC::DIR_PWR6:
1481 case PPC::DIR_PWR6X:
1482 case PPC::DIR_PWR7:
1483 case PPC::DIR_PWR8:
1484 case PPC::DIR_PWR9:
1485 case PPC::DIR_PWR10:
1486 case PPC::DIR_PWR11:
1490 break;
1491 }
1492
1493 if (Subtarget.enableMachineScheduler())
1495 else
1497
1499
1500 // The Freescale cores do better with aggressive inlining of memcpy and
1501 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1502 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1503 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1504 MaxStoresPerMemset = 32;
1506 MaxStoresPerMemcpy = 32;
1510 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1511 // The A2 also benefits from (very) aggressive inlining of memcpy and
1512 // friends. The overhead of a the function call, even when warm, can be
1513 // over one hundred cycles.
1514 MaxStoresPerMemset = 128;
1515 MaxStoresPerMemcpy = 128;
1516 MaxStoresPerMemmove = 128;
1517 MaxLoadsPerMemcmp = 128;
1518 } else {
1521 }
1522
1523 IsStrictFPEnabled = true;
1524
1525 // Let the subtarget (CPU) decide if a predictable select is more expensive
1526 // than the corresponding branch. This information is used in CGP to decide
1527 // when to convert selects into branches.
1529
1531}
1532
1533// *********************************** NOTE ************************************
1534// For selecting load and store instructions, the addressing modes are defined
1535// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1536// patterns to match the load the store instructions.
1537//
1538// The TD definitions for the addressing modes correspond to their respective
1539// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1540// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1541// address mode flags of a particular node. Afterwards, the computed address
1542// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1543// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1544// accordingly, based on the preferred addressing mode.
1545//
1546// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1547// MemOpFlags contains all the possible flags that can be used to compute the
1548// optimal addressing mode for load and store instructions.
1549// AddrMode contains all the possible load and store addressing modes available
1550// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1551//
1552// When adding new load and store instructions, it is possible that new address
1553// flags may need to be added into MemOpFlags, and a new addressing mode will
1554// need to be added to AddrMode. An entry of the new addressing mode (consisting
1555// of the minimal and main distinguishing address flags for the new load/store
1556// instructions) will need to be added into initializeAddrModeMap() below.
1557// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1558// need to be updated to account for selecting the optimal addressing mode.
1559// *****************************************************************************
1560/// Initialize the map that relates the different addressing modes of the load
1561/// and store instructions to a set of flags. This ensures the load/store
1562/// instruction is correctly matched during instruction selection.
1563void PPCTargetLowering::initializeAddrModeMap() {
1564 AddrModesMap[PPC::AM_DForm] = {
1565 // LWZ, STW
1570 // LBZ, LHZ, STB, STH
1575 // LHA
1580 // LFS, LFD, STFS, STFD
1585 };
1586 AddrModesMap[PPC::AM_DSForm] = {
1587 // LWA
1591 // LD, STD
1595 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1599 };
1600 AddrModesMap[PPC::AM_DQForm] = {
1601 // LXV, STXV
1605 };
1606 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1608 // TODO: Add mapping for quadword load/store.
1609}
1610
1611/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1612/// the desired ByVal argument alignment.
1613static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1614 if (MaxAlign == MaxMaxAlign)
1615 return;
1616 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1617 if (MaxMaxAlign >= 32 &&
1618 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1619 MaxAlign = Align(32);
1620 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1621 MaxAlign < 16)
1622 MaxAlign = Align(16);
1623 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1624 Align EltAlign;
1625 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1626 if (EltAlign > MaxAlign)
1627 MaxAlign = EltAlign;
1628 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1629 for (auto *EltTy : STy->elements()) {
1630 Align EltAlign;
1631 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1632 if (EltAlign > MaxAlign)
1633 MaxAlign = EltAlign;
1634 if (MaxAlign == MaxMaxAlign)
1635 break;
1636 }
1637 }
1638}
1639
1640/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1641/// function arguments in the caller parameter area.
1643 const DataLayout &DL) const {
1644 // 16byte and wider vectors are passed on 16byte boundary.
1645 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1646 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1647 if (Subtarget.hasAltivec())
1648 getMaxByValAlign(Ty, Alignment, Align(16));
1649 return Alignment.value();
1650}
1651
1653 return Subtarget.useSoftFloat();
1654}
1655
1657 return Subtarget.hasSPE();
1658}
1659
1661 return VT.isScalarInteger();
1662}
1663
1665 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1666 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1667 return false;
1668
1669 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1670 if (VTy->getScalarType()->isIntegerTy()) {
1671 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1672 if (ElemSizeInBits == 32) {
1673 Index = Subtarget.isLittleEndian() ? 2 : 1;
1674 return true;
1675 }
1676 if (ElemSizeInBits == 64) {
1677 Index = Subtarget.isLittleEndian() ? 1 : 0;
1678 return true;
1679 }
1680 }
1681 }
1682 return false;
1683}
1684
1685const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1686 switch ((PPCISD::NodeType)Opcode) {
1687 case PPCISD::FIRST_NUMBER: break;
1688 case PPCISD::FSEL: return "PPCISD::FSEL";
1689 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1690 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1691 case PPCISD::FCFID: return "PPCISD::FCFID";
1692 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1693 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1694 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1695 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1696 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1697 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1698 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1699 case PPCISD::FRE: return "PPCISD::FRE";
1700 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1701 case PPCISD::FTSQRT:
1702 return "PPCISD::FTSQRT";
1703 case PPCISD::FSQRT:
1704 return "PPCISD::FSQRT";
1705 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1706 case PPCISD::VPERM: return "PPCISD::VPERM";
1707 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1709 return "PPCISD::XXSPLTI_SP_TO_DP";
1711 return "PPCISD::XXSPLTI32DX";
1712 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1713 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1714 case PPCISD::XXPERM:
1715 return "PPCISD::XXPERM";
1716 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1717 case PPCISD::CMPB: return "PPCISD::CMPB";
1718 case PPCISD::Hi: return "PPCISD::Hi";
1719 case PPCISD::Lo: return "PPCISD::Lo";
1720 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1721 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1722 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1723 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1724 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1725 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1726 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1727 case PPCISD::SRL: return "PPCISD::SRL";
1728 case PPCISD::SRA: return "PPCISD::SRA";
1729 case PPCISD::SHL: return "PPCISD::SHL";
1730 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1731 case PPCISD::CALL: return "PPCISD::CALL";
1732 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1733 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1734 case PPCISD::CALL_RM:
1735 return "PPCISD::CALL_RM";
1737 return "PPCISD::CALL_NOP_RM";
1739 return "PPCISD::CALL_NOTOC_RM";
1740 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1741 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1742 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1743 case PPCISD::BCTRL_RM:
1744 return "PPCISD::BCTRL_RM";
1746 return "PPCISD::BCTRL_LOAD_TOC_RM";
1747 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1748 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1749 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1750 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1751 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1752 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1753 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1754 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1755 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1756 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1758 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1760 return "PPCISD::ANDI_rec_1_EQ_BIT";
1762 return "PPCISD::ANDI_rec_1_GT_BIT";
1763 case PPCISD::VCMP: return "PPCISD::VCMP";
1764 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1765 case PPCISD::LBRX: return "PPCISD::LBRX";
1766 case PPCISD::STBRX: return "PPCISD::STBRX";
1767 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1768 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1769 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1770 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1771 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1772 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1773 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1774 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1775 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1777 return "PPCISD::ST_VSR_SCAL_INT";
1778 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1779 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1780 case PPCISD::BDZ: return "PPCISD::BDZ";
1781 case PPCISD::MFFS: return "PPCISD::MFFS";
1782 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1783 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1784 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1785 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1786 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1787 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1788 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1789 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1790 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1791 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1792 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1793 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1794 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1795 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1796 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1797 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1798 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1799 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1800 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1801 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1802 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1803 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1804 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1806 return "PPCISD::PADDI_DTPREL";
1807 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1808 case PPCISD::SC: return "PPCISD::SC";
1809 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1810 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1811 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1812 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1813 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1814 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1815 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1816 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1817 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1818 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1819 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1820 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1822 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1824 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1825 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1826 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1827 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1828 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1829 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1830 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1831 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1832 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1834 return "PPCISD::STRICT_FADDRTZ";
1836 return "PPCISD::STRICT_FCTIDZ";
1838 return "PPCISD::STRICT_FCTIWZ";
1840 return "PPCISD::STRICT_FCTIDUZ";
1842 return "PPCISD::STRICT_FCTIWUZ";
1844 return "PPCISD::STRICT_FCFID";
1846 return "PPCISD::STRICT_FCFIDU";
1848 return "PPCISD::STRICT_FCFIDS";
1850 return "PPCISD::STRICT_FCFIDUS";
1851 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1852 case PPCISD::STORE_COND:
1853 return "PPCISD::STORE_COND";
1854 }
1855 return nullptr;
1856}
1857
1859 EVT VT) const {
1860 if (!VT.isVector())
1861 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1862
1864}
1865
1867 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1868 return true;
1869}
1870
1871//===----------------------------------------------------------------------===//
1872// Node matching predicates, for use by the tblgen matching code.
1873//===----------------------------------------------------------------------===//
1874
1875/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1878 return CFP->getValueAPF().isZero();
1879 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1880 // Maybe this has already been legalized into the constant pool?
1881 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1882 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1883 return CFP->getValueAPF().isZero();
1884 }
1885 return false;
1886}
1887
1888/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1889/// true if Op is undef or if it matches the specified value.
1890static bool isConstantOrUndef(int Op, int Val) {
1891 return Op < 0 || Op == Val;
1892}
1893
1894/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1895/// VPKUHUM instruction.
1896/// The ShuffleKind distinguishes between big-endian operations with
1897/// two different inputs (0), either-endian operations with two identical
1898/// inputs (1), and little-endian operations with two different inputs (2).
1899/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1901 SelectionDAG &DAG) {
1902 bool IsLE = DAG.getDataLayout().isLittleEndian();
1903 if (ShuffleKind == 0) {
1904 if (IsLE)
1905 return false;
1906 for (unsigned i = 0; i != 16; ++i)
1907 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1908 return false;
1909 } else if (ShuffleKind == 2) {
1910 if (!IsLE)
1911 return false;
1912 for (unsigned i = 0; i != 16; ++i)
1913 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1914 return false;
1915 } else if (ShuffleKind == 1) {
1916 unsigned j = IsLE ? 0 : 1;
1917 for (unsigned i = 0; i != 8; ++i)
1918 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1919 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1920 return false;
1921 }
1922 return true;
1923}
1924
1925/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1926/// VPKUWUM instruction.
1927/// The ShuffleKind distinguishes between big-endian operations with
1928/// two different inputs (0), either-endian operations with two identical
1929/// inputs (1), and little-endian operations with two different inputs (2).
1930/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1932 SelectionDAG &DAG) {
1933 bool IsLE = DAG.getDataLayout().isLittleEndian();
1934 if (ShuffleKind == 0) {
1935 if (IsLE)
1936 return false;
1937 for (unsigned i = 0; i != 16; i += 2)
1938 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1939 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1940 return false;
1941 } else if (ShuffleKind == 2) {
1942 if (!IsLE)
1943 return false;
1944 for (unsigned i = 0; i != 16; i += 2)
1945 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1946 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1947 return false;
1948 } else if (ShuffleKind == 1) {
1949 unsigned j = IsLE ? 0 : 2;
1950 for (unsigned i = 0; i != 8; i += 2)
1951 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1952 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1953 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1954 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1955 return false;
1956 }
1957 return true;
1958}
1959
1960/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1961/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1962/// current subtarget.
1963///
1964/// The ShuffleKind distinguishes between big-endian operations with
1965/// two different inputs (0), either-endian operations with two identical
1966/// inputs (1), and little-endian operations with two different inputs (2).
1967/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1969 SelectionDAG &DAG) {
1970 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1971 if (!Subtarget.hasP8Vector())
1972 return false;
1973
1974 bool IsLE = DAG.getDataLayout().isLittleEndian();
1975 if (ShuffleKind == 0) {
1976 if (IsLE)
1977 return false;
1978 for (unsigned i = 0; i != 16; i += 4)
1979 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1980 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1981 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1982 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1983 return false;
1984 } else if (ShuffleKind == 2) {
1985 if (!IsLE)
1986 return false;
1987 for (unsigned i = 0; i != 16; i += 4)
1988 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1989 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1990 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1991 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1992 return false;
1993 } else if (ShuffleKind == 1) {
1994 unsigned j = IsLE ? 0 : 4;
1995 for (unsigned i = 0; i != 8; i += 4)
1996 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1997 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1998 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1999 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
2000 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
2001 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
2002 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
2003 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
2004 return false;
2005 }
2006 return true;
2007}
2008
2009/// isVMerge - Common function, used to match vmrg* shuffles.
2010///
2011static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2012 unsigned LHSStart, unsigned RHSStart) {
2013 if (N->getValueType(0) != MVT::v16i8)
2014 return false;
2015 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2016 "Unsupported merge size!");
2017
2018 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2019 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2020 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2021 LHSStart+j+i*UnitSize) ||
2022 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2023 RHSStart+j+i*UnitSize))
2024 return false;
2025 }
2026 return true;
2027}
2028
2029/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2030/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2031/// The ShuffleKind distinguishes between big-endian merges with two
2032/// different inputs (0), either-endian merges with two identical inputs (1),
2033/// and little-endian merges with two different inputs (2). For the latter,
2034/// the input operands are swapped (see PPCInstrAltivec.td).
2036 unsigned ShuffleKind, SelectionDAG &DAG) {
2037 if (DAG.getDataLayout().isLittleEndian()) {
2038 if (ShuffleKind == 1) // unary
2039 return isVMerge(N, UnitSize, 0, 0);
2040 else if (ShuffleKind == 2) // swapped
2041 return isVMerge(N, UnitSize, 0, 16);
2042 else
2043 return false;
2044 } else {
2045 if (ShuffleKind == 1) // unary
2046 return isVMerge(N, UnitSize, 8, 8);
2047 else if (ShuffleKind == 0) // normal
2048 return isVMerge(N, UnitSize, 8, 24);
2049 else
2050 return false;
2051 }
2052}
2053
2054/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2055/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2056/// The ShuffleKind distinguishes between big-endian merges with two
2057/// different inputs (0), either-endian merges with two identical inputs (1),
2058/// and little-endian merges with two different inputs (2). For the latter,
2059/// the input operands are swapped (see PPCInstrAltivec.td).
2061 unsigned ShuffleKind, SelectionDAG &DAG) {
2062 if (DAG.getDataLayout().isLittleEndian()) {
2063 if (ShuffleKind == 1) // unary
2064 return isVMerge(N, UnitSize, 8, 8);
2065 else if (ShuffleKind == 2) // swapped
2066 return isVMerge(N, UnitSize, 8, 24);
2067 else
2068 return false;
2069 } else {
2070 if (ShuffleKind == 1) // unary
2071 return isVMerge(N, UnitSize, 0, 0);
2072 else if (ShuffleKind == 0) // normal
2073 return isVMerge(N, UnitSize, 0, 16);
2074 else
2075 return false;
2076 }
2077}
2078
2079/**
2080 * Common function used to match vmrgew and vmrgow shuffles
2081 *
2082 * The indexOffset determines whether to look for even or odd words in
2083 * the shuffle mask. This is based on the of the endianness of the target
2084 * machine.
2085 * - Little Endian:
2086 * - Use offset of 0 to check for odd elements
2087 * - Use offset of 4 to check for even elements
2088 * - Big Endian:
2089 * - Use offset of 0 to check for even elements
2090 * - Use offset of 4 to check for odd elements
2091 * A detailed description of the vector element ordering for little endian and
2092 * big endian can be found at
2093 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2094 * Targeting your applications - what little endian and big endian IBM XL C/C++
2095 * compiler differences mean to you
2096 *
2097 * The mask to the shuffle vector instruction specifies the indices of the
2098 * elements from the two input vectors to place in the result. The elements are
2099 * numbered in array-access order, starting with the first vector. These vectors
2100 * are always of type v16i8, thus each vector will contain 16 elements of size
2101 * 8. More info on the shuffle vector can be found in the
2102 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2103 * Language Reference.
2104 *
2105 * The RHSStartValue indicates whether the same input vectors are used (unary)
2106 * or two different input vectors are used, based on the following:
2107 * - If the instruction uses the same vector for both inputs, the range of the
2108 * indices will be 0 to 15. In this case, the RHSStart value passed should
2109 * be 0.
2110 * - If the instruction has two different vectors then the range of the
2111 * indices will be 0 to 31. In this case, the RHSStart value passed should
2112 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2113 * to 31 specify elements in the second vector).
2114 *
2115 * \param[in] N The shuffle vector SD Node to analyze
2116 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2117 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2118 * vector to the shuffle_vector instruction
2119 * \return true iff this shuffle vector represents an even or odd word merge
2120 */
2121static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2122 unsigned RHSStartValue) {
2123 if (N->getValueType(0) != MVT::v16i8)
2124 return false;
2125
2126 for (unsigned i = 0; i < 2; ++i)
2127 for (unsigned j = 0; j < 4; ++j)
2128 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2129 i*RHSStartValue+j+IndexOffset) ||
2130 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2131 i*RHSStartValue+j+IndexOffset+8))
2132 return false;
2133 return true;
2134}
2135
2136/**
2137 * Determine if the specified shuffle mask is suitable for the vmrgew or
2138 * vmrgow instructions.
2139 *
2140 * \param[in] N The shuffle vector SD Node to analyze
2141 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2142 * \param[in] ShuffleKind Identify the type of merge:
2143 * - 0 = big-endian merge with two different inputs;
2144 * - 1 = either-endian merge with two identical inputs;
2145 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2146 * little-endian merges).
2147 * \param[in] DAG The current SelectionDAG
2148 * \return true iff this shuffle mask
2149 */
2151 unsigned ShuffleKind, SelectionDAG &DAG) {
2152 if (DAG.getDataLayout().isLittleEndian()) {
2153 unsigned indexOffset = CheckEven ? 4 : 0;
2154 if (ShuffleKind == 1) // Unary
2155 return isVMerge(N, indexOffset, 0);
2156 else if (ShuffleKind == 2) // swapped
2157 return isVMerge(N, indexOffset, 16);
2158 else
2159 return false;
2160 }
2161 else {
2162 unsigned indexOffset = CheckEven ? 0 : 4;
2163 if (ShuffleKind == 1) // Unary
2164 return isVMerge(N, indexOffset, 0);
2165 else if (ShuffleKind == 0) // Normal
2166 return isVMerge(N, indexOffset, 16);
2167 else
2168 return false;
2169 }
2170 return false;
2171}
2172
2173/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2174/// amount, otherwise return -1.
2175/// The ShuffleKind distinguishes between big-endian operations with two
2176/// different inputs (0), either-endian operations with two identical inputs
2177/// (1), and little-endian operations with two different inputs (2). For the
2178/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2179int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2180 SelectionDAG &DAG) {
2181 if (N->getValueType(0) != MVT::v16i8)
2182 return -1;
2183
2185
2186 // Find the first non-undef value in the shuffle mask.
2187 unsigned i;
2188 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2189 /*search*/;
2190
2191 if (i == 16) return -1; // all undef.
2192
2193 // Otherwise, check to see if the rest of the elements are consecutively
2194 // numbered from this value.
2195 unsigned ShiftAmt = SVOp->getMaskElt(i);
2196 if (ShiftAmt < i) return -1;
2197
2198 ShiftAmt -= i;
2199 bool isLE = DAG.getDataLayout().isLittleEndian();
2200
2201 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2202 // Check the rest of the elements to see if they are consecutive.
2203 for (++i; i != 16; ++i)
2204 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2205 return -1;
2206 } else if (ShuffleKind == 1) {
2207 // Check the rest of the elements to see if they are consecutive.
2208 for (++i; i != 16; ++i)
2209 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2210 return -1;
2211 } else
2212 return -1;
2213
2214 if (isLE)
2215 ShiftAmt = 16 - ShiftAmt;
2216
2217 return ShiftAmt;
2218}
2219
2220/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2221/// specifies a splat of a single element that is suitable for input to
2222/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2224 EVT VT = N->getValueType(0);
2225 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2226 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2227
2228 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2229 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2230
2231 // The consecutive indices need to specify an element, not part of two
2232 // different elements. So abandon ship early if this isn't the case.
2233 if (N->getMaskElt(0) % EltSize != 0)
2234 return false;
2235
2236 // This is a splat operation if each element of the permute is the same, and
2237 // if the value doesn't reference the second vector.
2238 unsigned ElementBase = N->getMaskElt(0);
2239
2240 // FIXME: Handle UNDEF elements too!
2241 if (ElementBase >= 16)
2242 return false;
2243
2244 // Check that the indices are consecutive, in the case of a multi-byte element
2245 // splatted with a v16i8 mask.
2246 for (unsigned i = 1; i != EltSize; ++i)
2247 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2248 return false;
2249
2250 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2251 if (N->getMaskElt(i) < 0) continue;
2252 for (unsigned j = 0; j != EltSize; ++j)
2253 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2254 return false;
2255 }
2256 return true;
2257}
2258
2259/// Check that the mask is shuffling N byte elements. Within each N byte
2260/// element of the mask, the indices could be either in increasing or
2261/// decreasing order as long as they are consecutive.
2262/// \param[in] N the shuffle vector SD Node to analyze
2263/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2264/// Word/DoubleWord/QuadWord).
2265/// \param[in] StepLen the delta indices number among the N byte element, if
2266/// the mask is in increasing/decreasing order then it is 1/-1.
2267/// \return true iff the mask is shuffling N byte elements.
2268static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2269 int StepLen) {
2270 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2271 "Unexpected element width.");
2272 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2273
2274 unsigned NumOfElem = 16 / Width;
2275 unsigned MaskVal[16]; // Width is never greater than 16
2276 for (unsigned i = 0; i < NumOfElem; ++i) {
2277 MaskVal[0] = N->getMaskElt(i * Width);
2278 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2279 return false;
2280 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2281 return false;
2282 }
2283
2284 for (unsigned int j = 1; j < Width; ++j) {
2285 MaskVal[j] = N->getMaskElt(i * Width + j);
2286 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2287 return false;
2288 }
2289 }
2290 }
2291
2292 return true;
2293}
2294
2295bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2296 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2297 if (!isNByteElemShuffleMask(N, 4, 1))
2298 return false;
2299
2300 // Now we look at mask elements 0,4,8,12
2301 unsigned M0 = N->getMaskElt(0) / 4;
2302 unsigned M1 = N->getMaskElt(4) / 4;
2303 unsigned M2 = N->getMaskElt(8) / 4;
2304 unsigned M3 = N->getMaskElt(12) / 4;
2305 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2306 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2307
2308 // Below, let H and L be arbitrary elements of the shuffle mask
2309 // where H is in the range [4,7] and L is in the range [0,3].
2310 // H, 1, 2, 3 or L, 5, 6, 7
2311 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2312 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2313 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2314 InsertAtByte = IsLE ? 12 : 0;
2315 Swap = M0 < 4;
2316 return true;
2317 }
2318 // 0, H, 2, 3 or 4, L, 6, 7
2319 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2320 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2321 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2322 InsertAtByte = IsLE ? 8 : 4;
2323 Swap = M1 < 4;
2324 return true;
2325 }
2326 // 0, 1, H, 3 or 4, 5, L, 7
2327 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2328 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2329 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2330 InsertAtByte = IsLE ? 4 : 8;
2331 Swap = M2 < 4;
2332 return true;
2333 }
2334 // 0, 1, 2, H or 4, 5, 6, L
2335 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2336 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2337 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2338 InsertAtByte = IsLE ? 0 : 12;
2339 Swap = M3 < 4;
2340 return true;
2341 }
2342
2343 // If both vector operands for the shuffle are the same vector, the mask will
2344 // contain only elements from the first one and the second one will be undef.
2345 if (N->getOperand(1).isUndef()) {
2346 ShiftElts = 0;
2347 Swap = true;
2348 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2349 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2350 InsertAtByte = IsLE ? 12 : 0;
2351 return true;
2352 }
2353 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2354 InsertAtByte = IsLE ? 8 : 4;
2355 return true;
2356 }
2357 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2358 InsertAtByte = IsLE ? 4 : 8;
2359 return true;
2360 }
2361 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2362 InsertAtByte = IsLE ? 0 : 12;
2363 return true;
2364 }
2365 }
2366
2367 return false;
2368}
2369
2371 bool &Swap, bool IsLE) {
2372 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2373 // Ensure each byte index of the word is consecutive.
2374 if (!isNByteElemShuffleMask(N, 4, 1))
2375 return false;
2376
2377 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2378 unsigned M0 = N->getMaskElt(0) / 4;
2379 unsigned M1 = N->getMaskElt(4) / 4;
2380 unsigned M2 = N->getMaskElt(8) / 4;
2381 unsigned M3 = N->getMaskElt(12) / 4;
2382
2383 // If both vector operands for the shuffle are the same vector, the mask will
2384 // contain only elements from the first one and the second one will be undef.
2385 if (N->getOperand(1).isUndef()) {
2386 assert(M0 < 4 && "Indexing into an undef vector?");
2387 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2388 return false;
2389
2390 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2391 Swap = false;
2392 return true;
2393 }
2394
2395 // Ensure each word index of the ShuffleVector Mask is consecutive.
2396 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2397 return false;
2398
2399 if (IsLE) {
2400 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2401 // Input vectors don't need to be swapped if the leading element
2402 // of the result is one of the 3 left elements of the second vector
2403 // (or if there is no shift to be done at all).
2404 Swap = false;
2405 ShiftElts = (8 - M0) % 8;
2406 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2407 // Input vectors need to be swapped if the leading element
2408 // of the result is one of the 3 left elements of the first vector
2409 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2410 Swap = true;
2411 ShiftElts = (4 - M0) % 4;
2412 }
2413
2414 return true;
2415 } else { // BE
2416 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2417 // Input vectors don't need to be swapped if the leading element
2418 // of the result is one of the 4 elements of the first vector.
2419 Swap = false;
2420 ShiftElts = M0;
2421 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2422 // Input vectors need to be swapped if the leading element
2423 // of the result is one of the 4 elements of the right vector.
2424 Swap = true;
2425 ShiftElts = M0 - 4;
2426 }
2427
2428 return true;
2429 }
2430}
2431
2433 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2434
2435 if (!isNByteElemShuffleMask(N, Width, -1))
2436 return false;
2437
2438 for (int i = 0; i < 16; i += Width)
2439 if (N->getMaskElt(i) != i + Width - 1)
2440 return false;
2441
2442 return true;
2443}
2444
2448
2452
2456
2460
2461/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2462/// if the inputs to the instruction should be swapped and set \p DM to the
2463/// value for the immediate.
2464/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2465/// AND element 0 of the result comes from the first input (LE) or second input
2466/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2467/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2468/// mask.
2470 bool &Swap, bool IsLE) {
2471 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2472
2473 // Ensure each byte index of the double word is consecutive.
2474 if (!isNByteElemShuffleMask(N, 8, 1))
2475 return false;
2476
2477 unsigned M0 = N->getMaskElt(0) / 8;
2478 unsigned M1 = N->getMaskElt(8) / 8;
2479 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2480
2481 // If both vector operands for the shuffle are the same vector, the mask will
2482 // contain only elements from the first one and the second one will be undef.
2483 if (N->getOperand(1).isUndef()) {
2484 if ((M0 | M1) < 2) {
2485 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2486 Swap = false;
2487 return true;
2488 } else
2489 return false;
2490 }
2491
2492 if (IsLE) {
2493 if (M0 > 1 && M1 < 2) {
2494 Swap = false;
2495 } else if (M0 < 2 && M1 > 1) {
2496 M0 = (M0 + 2) % 4;
2497 M1 = (M1 + 2) % 4;
2498 Swap = true;
2499 } else
2500 return false;
2501
2502 // Note: if control flow comes here that means Swap is already set above
2503 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2504 return true;
2505 } else { // BE
2506 if (M0 < 2 && M1 > 1) {
2507 Swap = false;
2508 } else if (M0 > 1 && M1 < 2) {
2509 M0 = (M0 + 2) % 4;
2510 M1 = (M1 + 2) % 4;
2511 Swap = true;
2512 } else
2513 return false;
2514
2515 // Note: if control flow comes here that means Swap is already set above
2516 DM = (M0 << 1) + (M1 & 1);
2517 return true;
2518 }
2519}
2520
2521
2522/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2523/// appropriate for PPC mnemonics (which have a big endian bias - namely
2524/// elements are counted from the left of the vector register).
2525unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2526 SelectionDAG &DAG) {
2528 assert(isSplatShuffleMask(SVOp, EltSize));
2529 EVT VT = SVOp->getValueType(0);
2530
2531 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2532 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2533 : SVOp->getMaskElt(0);
2534
2535 if (DAG.getDataLayout().isLittleEndian())
2536 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2537 else
2538 return SVOp->getMaskElt(0) / EltSize;
2539}
2540
2541/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2542/// by using a vspltis[bhw] instruction of the specified element size, return
2543/// the constant being splatted. The ByteSize field indicates the number of
2544/// bytes of each element [124] -> [bhw].
2546 SDValue OpVal;
2547
2548 // If ByteSize of the splat is bigger than the element size of the
2549 // build_vector, then we have a case where we are checking for a splat where
2550 // multiple elements of the buildvector are folded together into a single
2551 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2552 unsigned EltSize = 16/N->getNumOperands();
2553 if (EltSize < ByteSize) {
2554 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2555 SDValue UniquedVals[4];
2556 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2557
2558 // See if all of the elements in the buildvector agree across.
2559 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2560 if (N->getOperand(i).isUndef()) continue;
2561 // If the element isn't a constant, bail fully out.
2562 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2563
2564 if (!UniquedVals[i&(Multiple-1)].getNode())
2565 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2566 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2567 return SDValue(); // no match.
2568 }
2569
2570 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2571 // either constant or undef values that are identical for each chunk. See
2572 // if these chunks can form into a larger vspltis*.
2573
2574 // Check to see if all of the leading entries are either 0 or -1. If
2575 // neither, then this won't fit into the immediate field.
2576 bool LeadingZero = true;
2577 bool LeadingOnes = true;
2578 for (unsigned i = 0; i != Multiple-1; ++i) {
2579 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2580
2581 LeadingZero &= isNullConstant(UniquedVals[i]);
2582 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2583 }
2584 // Finally, check the least significant entry.
2585 if (LeadingZero) {
2586 if (!UniquedVals[Multiple-1].getNode())
2587 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2588 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2589 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2590 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2591 }
2592 if (LeadingOnes) {
2593 if (!UniquedVals[Multiple-1].getNode())
2594 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2595 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2596 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2597 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2598 }
2599
2600 return SDValue();
2601 }
2602
2603 // Check to see if this buildvec has a single non-undef value in its elements.
2604 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2605 if (N->getOperand(i).isUndef()) continue;
2606 if (!OpVal.getNode())
2607 OpVal = N->getOperand(i);
2608 else if (OpVal != N->getOperand(i))
2609 return SDValue();
2610 }
2611
2612 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2613
2614 unsigned ValSizeInBytes = EltSize;
2615 uint64_t Value = 0;
2616 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2617 Value = CN->getZExtValue();
2618 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2619 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2620 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2621 }
2622
2623 // If the splat value is larger than the element value, then we can never do
2624 // this splat. The only case that we could fit the replicated bits into our
2625 // immediate field for would be zero, and we prefer to use vxor for it.
2626 if (ValSizeInBytes < ByteSize) return SDValue();
2627
2628 // If the element value is larger than the splat value, check if it consists
2629 // of a repeated bit pattern of size ByteSize.
2630 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2631 return SDValue();
2632
2633 // Properly sign extend the value.
2634 int MaskVal = SignExtend32(Value, ByteSize * 8);
2635
2636 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2637 if (MaskVal == 0) return SDValue();
2638
2639 // Finally, if this value fits in a 5 bit sext field, return it
2640 if (SignExtend32<5>(MaskVal) == MaskVal)
2641 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2642 return SDValue();
2643}
2644
2645//===----------------------------------------------------------------------===//
2646// Addressing Mode Selection
2647//===----------------------------------------------------------------------===//
2648
2649/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2650/// or 64-bit immediate, and if the value can be accurately represented as a
2651/// sign extension from a 16-bit value. If so, this returns true and the
2652/// immediate.
2653bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2654 if (!isa<ConstantSDNode>(N))
2655 return false;
2656
2657 Imm = (int16_t)N->getAsZExtVal();
2658 if (N->getValueType(0) == MVT::i32)
2659 return Imm == (int32_t)N->getAsZExtVal();
2660 else
2661 return Imm == (int64_t)N->getAsZExtVal();
2662}
2664 return isIntS16Immediate(Op.getNode(), Imm);
2665}
2666
2667/// Used when computing address flags for selecting loads and stores.
2668/// If we have an OR, check if the LHS and RHS are provably disjoint.
2669/// An OR of two provably disjoint values is equivalent to an ADD.
2670/// Most PPC load/store instructions compute the effective address as a sum,
2671/// so doing this conversion is useful.
2672static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2673 if (N.getOpcode() != ISD::OR)
2674 return false;
2675 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2676 if (!LHSKnown.Zero.getBoolValue())
2677 return false;
2678 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2679 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2680}
2681
2682/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2683/// be represented as an indexed [r+r] operation.
2685 SDValue &Index,
2686 SelectionDAG &DAG) const {
2687 for (SDNode *U : N->uses()) {
2688 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2689 if (Memop->getMemoryVT() == MVT::f64) {
2690 Base = N.getOperand(0);
2691 Index = N.getOperand(1);
2692 return true;
2693 }
2694 }
2695 }
2696 return false;
2697}
2698
2699/// isIntS34Immediate - This method tests if value of node given can be
2700/// accurately represented as a sign extension from a 34-bit value. If so,
2701/// this returns true and the immediate.
2702bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2703 if (!isa<ConstantSDNode>(N))
2704 return false;
2705
2706 Imm = (int64_t)N->getAsZExtVal();
2707 return isInt<34>(Imm);
2708}
2710 return isIntS34Immediate(Op.getNode(), Imm);
2711}
2712
2713/// SelectAddressRegReg - Given the specified addressed, check to see if it
2714/// can be represented as an indexed [r+r] operation. Returns false if it
2715/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2716/// non-zero and N can be represented by a base register plus a signed 16-bit
2717/// displacement, make a more precise judgement by checking (displacement % \p
2718/// EncodingAlignment).
2721 MaybeAlign EncodingAlignment) const {
2722 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2723 // a [pc+imm].
2725 return false;
2726
2727 int16_t Imm = 0;
2728 if (N.getOpcode() == ISD::ADD) {
2729 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2730 // SPE load/store can only handle 8-bit offsets.
2731 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2732 return true;
2733 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2734 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2735 return false; // r+i
2736 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2737 return false; // r+i
2738
2739 Base = N.getOperand(0);
2740 Index = N.getOperand(1);
2741 return true;
2742 } else if (N.getOpcode() == ISD::OR) {
2743 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2744 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2745 return false; // r+i can fold it if we can.
2746
2747 // If this is an or of disjoint bitfields, we can codegen this as an add
2748 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2749 // disjoint.
2750 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2751
2752 if (LHSKnown.Zero.getBoolValue()) {
2753 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2754 // If all of the bits are known zero on the LHS or RHS, the add won't
2755 // carry.
2756 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2757 Base = N.getOperand(0);
2758 Index = N.getOperand(1);
2759 return true;
2760 }
2761 }
2762 }
2763
2764 return false;
2765}
2766
2767// If we happen to be doing an i64 load or store into a stack slot that has
2768// less than a 4-byte alignment, then the frame-index elimination may need to
2769// use an indexed load or store instruction (because the offset may not be a
2770// multiple of 4). The extra register needed to hold the offset comes from the
2771// register scavenger, and it is possible that the scavenger will need to use
2772// an emergency spill slot. As a result, we need to make sure that a spill slot
2773// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2774// stack slot.
2775static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2776 // FIXME: This does not handle the LWA case.
2777 if (VT != MVT::i64)
2778 return;
2779
2780 // NOTE: We'll exclude negative FIs here, which come from argument
2781 // lowering, because there are no known test cases triggering this problem
2782 // using packed structures (or similar). We can remove this exclusion if
2783 // we find such a test case. The reason why this is so test-case driven is
2784 // because this entire 'fixup' is only to prevent crashes (from the
2785 // register scavenger) on not-really-valid inputs. For example, if we have:
2786 // %a = alloca i1
2787 // %b = bitcast i1* %a to i64*
2788 // store i64* a, i64 b
2789 // then the store should really be marked as 'align 1', but is not. If it
2790 // were marked as 'align 1' then the indexed form would have been
2791 // instruction-selected initially, and the problem this 'fixup' is preventing
2792 // won't happen regardless.
2793 if (FrameIdx < 0)
2794 return;
2795
2797 MachineFrameInfo &MFI = MF.getFrameInfo();
2798
2799 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2800 return;
2801
2802 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2803 FuncInfo->setHasNonRISpills();
2804}
2805
2806/// Returns true if the address N can be represented by a base register plus
2807/// a signed 16-bit displacement [r+imm], and if it is not better
2808/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2809/// displacements that are multiples of that value.
2811 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2812 MaybeAlign EncodingAlignment) const {
2813 // FIXME dl should come from parent load or store, not from address
2814 SDLoc dl(N);
2815
2816 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2817 // a [pc+imm].
2819 return false;
2820
2821 // If this can be more profitably realized as r+r, fail.
2822 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2823 return false;
2824
2825 if (N.getOpcode() == ISD::ADD) {
2826 int16_t imm = 0;
2827 if (isIntS16Immediate(N.getOperand(1), imm) &&
2828 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2829 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2830 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2831 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2832 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2833 } else {
2834 Base = N.getOperand(0);
2835 }
2836 return true; // [r+i]
2837 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2838 // Match LOAD (ADD (X, Lo(G))).
2839 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2840 "Cannot handle constant offsets yet!");
2841 Disp = N.getOperand(1).getOperand(0); // The global address.
2846 Base = N.getOperand(0);
2847 return true; // [&g+r]
2848 }
2849 } else if (N.getOpcode() == ISD::OR) {
2850 int16_t imm = 0;
2851 if (isIntS16Immediate(N.getOperand(1), imm) &&
2852 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2853 // If this is an or of disjoint bitfields, we can codegen this as an add
2854 // (for better address arithmetic) if the LHS and RHS of the OR are
2855 // provably disjoint.
2856 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2857
2858 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2859 // If all of the bits are known zero on the LHS or RHS, the add won't
2860 // carry.
2861 if (FrameIndexSDNode *FI =
2862 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2863 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2864 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2865 } else {
2866 Base = N.getOperand(0);
2867 }
2868 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2869 return true;
2870 }
2871 }
2872 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2873 // Loading from a constant address.
2874
2875 // If this address fits entirely in a 16-bit sext immediate field, codegen
2876 // this as "d, 0"
2877 int16_t Imm;
2878 if (isIntS16Immediate(CN, Imm) &&
2879 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2880 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2881 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2882 CN->getValueType(0));
2883 return true;
2884 }
2885
2886 // Handle 32-bit sext immediates with LIS + addr mode.
2887 if ((CN->getValueType(0) == MVT::i32 ||
2888 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2889 (!EncodingAlignment ||
2890 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2891 int Addr = (int)CN->getZExtValue();
2892
2893 // Otherwise, break this down into an LIS + disp.
2894 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2895
2896 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2897 MVT::i32);
2898 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2899 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2900 return true;
2901 }
2902 }
2903
2904 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2906 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2907 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2908 } else
2909 Base = N;
2910 return true; // [r+0]
2911}
2912
2913/// Similar to the 16-bit case but for instructions that take a 34-bit
2914/// displacement field (prefixed loads/stores).
2916 SDValue &Base,
2917 SelectionDAG &DAG) const {
2918 // Only on 64-bit targets.
2919 if (N.getValueType() != MVT::i64)
2920 return false;
2921
2922 SDLoc dl(N);
2923 int64_t Imm = 0;
2924
2925 if (N.getOpcode() == ISD::ADD) {
2926 if (!isIntS34Immediate(N.getOperand(1), Imm))
2927 return false;
2928 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2929 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2930 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2931 else
2932 Base = N.getOperand(0);
2933 return true;
2934 }
2935
2936 if (N.getOpcode() == ISD::OR) {
2937 if (!isIntS34Immediate(N.getOperand(1), Imm))
2938 return false;
2939 // If this is an or of disjoint bitfields, we can codegen this as an add
2940 // (for better address arithmetic) if the LHS and RHS of the OR are
2941 // provably disjoint.
2942 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2943 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2944 return false;
2945 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2946 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2947 else
2948 Base = N.getOperand(0);
2949 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2950 return true;
2951 }
2952
2953 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2954 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2955 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2956 return true;
2957 }
2958
2959 return false;
2960}
2961
2962/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2963/// represented as an indexed [r+r] operation.
2965 SDValue &Index,
2966 SelectionDAG &DAG) const {
2967 // Check to see if we can easily represent this as an [r+r] address. This
2968 // will fail if it thinks that the address is more profitably represented as
2969 // reg+imm, e.g. where imm = 0.
2970 if (SelectAddressRegReg(N, Base, Index, DAG))
2971 return true;
2972
2973 // If the address is the result of an add, we will utilize the fact that the
2974 // address calculation includes an implicit add. However, we can reduce
2975 // register pressure if we do not materialize a constant just for use as the
2976 // index register. We only get rid of the add if it is not an add of a
2977 // value and a 16-bit signed constant and both have a single use.
2978 int16_t imm = 0;
2979 if (N.getOpcode() == ISD::ADD &&
2980 (!isIntS16Immediate(N.getOperand(1), imm) ||
2981 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2982 Base = N.getOperand(0);
2983 Index = N.getOperand(1);
2984 return true;
2985 }
2986
2987 // Otherwise, do it the hard way, using R0 as the base register.
2988 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2989 N.getValueType());
2990 Index = N;
2991 return true;
2992}
2993
2994template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2995 Ty *PCRelCand = dyn_cast<Ty>(N);
2996 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2997}
2998
2999/// Returns true if this address is a PC Relative address.
3000/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
3001/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
3003 // This is a materialize PC Relative node. Always select this as PC Relative.
3004 Base = N;
3005 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3006 return true;
3011 return true;
3012 return false;
3013}
3014
3015/// Returns true if we should use a direct load into vector instruction
3016/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3017static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3018
3019 // If there are any other uses other than scalar to vector, then we should
3020 // keep it as a scalar load -> direct move pattern to prevent multiple
3021 // loads.
3023 if (!LD)
3024 return false;
3025
3026 EVT MemVT = LD->getMemoryVT();
3027 if (!MemVT.isSimple())
3028 return false;
3029 switch(MemVT.getSimpleVT().SimpleTy) {
3030 case MVT::i64:
3031 break;
3032 case MVT::i32:
3033 if (!ST.hasP8Vector())
3034 return false;
3035 break;
3036 case MVT::i16:
3037 case MVT::i8:
3038 if (!ST.hasP9Vector())
3039 return false;
3040 break;
3041 default:
3042 return false;
3043 }
3044
3045 SDValue LoadedVal(N, 0);
3046 if (!LoadedVal.hasOneUse())
3047 return false;
3048
3049 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
3050 UI != UE; ++UI)
3051 if (UI.getUse().get().getResNo() == 0 &&
3052 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3053 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3054 return false;
3055
3056 return true;
3057}
3058
3059/// getPreIndexedAddressParts - returns true by value, base pointer and
3060/// offset pointer and addressing mode by reference if the node's address
3061/// can be legally represented as pre-indexed load / store address.
3063 SDValue &Offset,
3065 SelectionDAG &DAG) const {
3066 if (DisablePPCPreinc) return false;
3067
3068 bool isLoad = true;
3069 SDValue Ptr;
3070 EVT VT;
3071 Align Alignment;
3072 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3073 Ptr = LD->getBasePtr();
3074 VT = LD->getMemoryVT();
3075 Alignment = LD->getAlign();
3076 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3077 Ptr = ST->getBasePtr();
3078 VT = ST->getMemoryVT();
3079 Alignment = ST->getAlign();
3080 isLoad = false;
3081 } else
3082 return false;
3083
3084 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3085 // instructions because we can fold these into a more efficient instruction
3086 // instead, (such as LXSD).
3087 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3088 return false;
3089 }
3090
3091 // PowerPC doesn't have preinc load/store instructions for vectors
3092 if (VT.isVector())
3093 return false;
3094
3095 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3096 // Common code will reject creating a pre-inc form if the base pointer
3097 // is a frame index, or if N is a store and the base pointer is either
3098 // the same as or a predecessor of the value being stored. Check for
3099 // those situations here, and try with swapped Base/Offset instead.
3100 bool Swap = false;
3101
3103 Swap = true;
3104 else if (!isLoad) {
3105 SDValue Val = cast<StoreSDNode>(N)->getValue();
3106 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3107 Swap = true;
3108 }
3109
3110 if (Swap)
3112
3113 AM = ISD::PRE_INC;
3114 return true;
3115 }
3116
3117 // LDU/STU can only handle immediates that are a multiple of 4.
3118 if (VT != MVT::i64) {
3119 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3120 return false;
3121 } else {
3122 // LDU/STU need an address with at least 4-byte alignment.
3123 if (Alignment < Align(4))
3124 return false;
3125
3126 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3127 return false;
3128 }
3129
3130 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3131 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3132 // sext i32 to i64 when addr mode is r+i.
3133 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3134 LD->getExtensionType() == ISD::SEXTLOAD &&
3136 return false;
3137 }
3138
3139 AM = ISD::PRE_INC;
3140 return true;
3141}
3142
3143//===----------------------------------------------------------------------===//
3144// LowerOperation implementation
3145//===----------------------------------------------------------------------===//
3146
3147/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3148/// and LoOpFlags to the target MO flags.
3149static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3150 unsigned &HiOpFlags, unsigned &LoOpFlags,
3151 const GlobalValue *GV = nullptr) {
3152 HiOpFlags = PPCII::MO_HA;
3153 LoOpFlags = PPCII::MO_LO;
3154
3155 // Don't use the pic base if not in PIC relocation model.
3156 if (IsPIC) {
3157 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3158 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3159 }
3160}
3161
3162static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3163 SelectionDAG &DAG) {
3164 SDLoc DL(HiPart);
3165 EVT PtrVT = HiPart.getValueType();
3166 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3167
3168 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3169 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3170
3171 // With PIC, the first instruction is actually "GR+hi(&G)".
3172 if (isPIC)
3173 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3174 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3175
3176 // Generate non-pic code that has direct accesses to the constant pool.
3177 // The address of the global is just (hi(&g)+lo(&g)).
3178 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3179}
3180
3182 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3183 FuncInfo->setUsesTOCBasePtr();
3184}
3185
3189
3190SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3191 SDValue GA) const {
3192 const bool Is64Bit = Subtarget.isPPC64();
3193 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3194 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3195 : Subtarget.isAIXABI()
3196 ? DAG.getRegister(PPC::R2, VT)
3197 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3198 SDValue Ops[] = { GA, Reg };
3199 return DAG.getMemIntrinsicNode(
3200 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3203}
3204
3205SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3206 SelectionDAG &DAG) const {
3207 EVT PtrVT = Op.getValueType();
3209 const Constant *C = CP->getConstVal();
3210
3211 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3212 // The actual address of the GlobalValue is stored in the TOC.
3213 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3214 if (Subtarget.isUsingPCRelativeCalls()) {
3215 SDLoc DL(CP);
3216 EVT Ty = getPointerTy(DAG.getDataLayout());
3217 SDValue ConstPool = DAG.getTargetConstantPool(
3218 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3219 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3220 }
3221 setUsesTOCBasePtr(DAG);
3222 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3223 return getTOCEntry(DAG, SDLoc(CP), GA);
3224 }
3225
3226 unsigned MOHiFlag, MOLoFlag;
3227 bool IsPIC = isPositionIndependent();
3228 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3229
3230 if (IsPIC && Subtarget.isSVR4ABI()) {
3231 SDValue GA =
3232 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3233 return getTOCEntry(DAG, SDLoc(CP), GA);
3234 }
3235
3236 SDValue CPIHi =
3237 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3238 SDValue CPILo =
3239 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3240 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3241}
3242
3243// For 64-bit PowerPC, prefer the more compact relative encodings.
3244// This trades 32 bits per jump table entry for one or two instructions
3245// on the jump site.
3252
3255 return false;
3256 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3257 return true;
3259}
3260
3262 SelectionDAG &DAG) const {
3263 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3265
3266 switch (getTargetMachine().getCodeModel()) {
3267 case CodeModel::Small:
3268 case CodeModel::Medium:
3270 default:
3271 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3273 }
3274}
3275
3276const MCExpr *
3278 unsigned JTI,
3279 MCContext &Ctx) const {
3280 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3282
3283 switch (getTargetMachine().getCodeModel()) {
3284 case CodeModel::Small:
3285 case CodeModel::Medium:
3287 default:
3288 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3289 }
3290}
3291
3292SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3293 EVT PtrVT = Op.getValueType();
3295
3296 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3297 if (Subtarget.isUsingPCRelativeCalls()) {
3298 SDLoc DL(JT);
3299 EVT Ty = getPointerTy(DAG.getDataLayout());
3300 SDValue GA =
3301 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3302 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3303 return MatAddr;
3304 }
3305
3306 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3307 // The actual address of the GlobalValue is stored in the TOC.
3308 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3309 setUsesTOCBasePtr(DAG);
3310 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3311 return getTOCEntry(DAG, SDLoc(JT), GA);
3312 }
3313
3314 unsigned MOHiFlag, MOLoFlag;
3315 bool IsPIC = isPositionIndependent();
3316 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3317
3318 if (IsPIC && Subtarget.isSVR4ABI()) {
3319 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3321 return getTOCEntry(DAG, SDLoc(GA), GA);
3322 }
3323
3324 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3325 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3326 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3327}
3328
3329SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3330 SelectionDAG &DAG) const {
3331 EVT PtrVT = Op.getValueType();
3333 const BlockAddress *BA = BASDN->getBlockAddress();
3334
3335 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3336 if (Subtarget.isUsingPCRelativeCalls()) {
3337 SDLoc DL(BASDN);
3338 EVT Ty = getPointerTy(DAG.getDataLayout());
3339 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3341 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3342 return MatAddr;
3343 }
3344
3345 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3346 // The actual BlockAddress is stored in the TOC.
3347 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3348 setUsesTOCBasePtr(DAG);
3349 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3350 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3351 }
3352
3353 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3354 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3355 return getTOCEntry(
3356 DAG, SDLoc(BASDN),
3357 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3358
3359 unsigned MOHiFlag, MOLoFlag;
3360 bool IsPIC = isPositionIndependent();
3361 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3362 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3363 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3364 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3365}
3366
3367SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3368 SelectionDAG &DAG) const {
3369 if (Subtarget.isAIXABI())
3370 return LowerGlobalTLSAddressAIX(Op, DAG);
3371
3372 return LowerGlobalTLSAddressLinux(Op, DAG);
3373}
3374
3375/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3376/// and then apply the update.
3378 SelectionDAG &DAG,
3379 const TargetMachine &TM) {
3380 // Initialize TLS model opt setting lazily:
3381 // (1) Use initial-exec for single TLS var references within current function.
3382 // (2) Use local-dynamic for multiple TLS var references within current
3383 // function.
3384 PPCFunctionInfo *FuncInfo =
3386 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3388 // Iterate over all instructions within current function, collect all TLS
3389 // global variables (global variables taken as the first parameter to
3390 // Intrinsic::threadlocal_address).
3391 const Function &Func = DAG.getMachineFunction().getFunction();
3392 for (const BasicBlock &BB : Func)
3393 for (const Instruction &I : BB)
3394 if (I.getOpcode() == Instruction::Call)
3395 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3396 if (Function *CF = CI->getCalledFunction())
3397 if (CF->isDeclaration() &&
3398 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3399 if (const GlobalValue *GV =
3400 dyn_cast<GlobalValue>(I.getOperand(0))) {
3401 TLSModel::Model GVModel = TM.getTLSModel(GV);
3402 if (GVModel == TLSModel::LocalDynamic)
3403 TLSGV.insert(GV);
3404 }
3405
3406 unsigned TLSGVCnt = TLSGV.size();
3407 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3408 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3409 FuncInfo->setAIXFuncUseTLSIEForLD();
3411 }
3412
3413 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3414 LLVM_DEBUG(
3415 dbgs() << DAG.getMachineFunction().getName()
3416 << " function is using the TLS-IE model for TLS-LD access.\n");
3417 Model = TLSModel::InitialExec;
3418 }
3419}
3420
3421SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3422 SelectionDAG &DAG) const {
3424
3425 if (DAG.getTarget().useEmulatedTLS())
3426 report_fatal_error("Emulated TLS is not yet supported on AIX");
3427
3428 SDLoc dl(GA);
3429 const GlobalValue *GV = GA->getGlobal();
3430 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3431 bool Is64Bit = Subtarget.isPPC64();
3433
3434 // Apply update to the TLS model.
3435 if (Subtarget.hasAIXShLibTLSModelOpt())
3437
3438 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3439
3440 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3441 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3442 bool HasAIXSmallTLSGlobalAttr = false;
3443 SDValue VariableOffsetTGA =
3444 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3445 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3446 SDValue TLSReg;
3447
3448 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3449 if (GVar->hasAttribute("aix-small-tls"))
3450 HasAIXSmallTLSGlobalAttr = true;
3451
3452 if (Is64Bit) {
3453 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3454 // involves a load of the variable offset (from the TOC), followed by an
3455 // add of the loaded variable offset to R13 (the thread pointer).
3456 // This code sequence looks like:
3457 // ld reg1,var[TC](2)
3458 // add reg2, reg1, r13 // r13 contains the thread pointer
3459 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3460
3461 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3462 // global variable attribute, produce a faster access sequence for
3463 // local-exec TLS variables where the offset from the TLS base is encoded
3464 // as an immediate operand.
3465 //
3466 // We only utilize the faster local-exec access sequence when the TLS
3467 // variable has a size within the policy limit. We treat types that are
3468 // not sized or are empty as being over the policy size limit.
3469 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3470 IsTLSLocalExecModel) {
3471 Type *GVType = GV->getValueType();
3472 if (GVType->isSized() && !GVType->isEmptyTy() &&
3473 GV->getDataLayout().getTypeAllocSize(GVType) <=
3475 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3476 }
3477 } else {
3478 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3479 // involves loading the variable offset from the TOC, generating a call to
3480 // .__get_tpointer to get the thread pointer (which will be in R3), and
3481 // adding the two together:
3482 // lwz reg1,var[TC](2)
3483 // bla .__get_tpointer
3484 // add reg2, reg1, r3
3485 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3486
3487 // We do not implement the 32-bit version of the faster access sequence
3488 // for local-exec that is controlled by the -maix-small-local-exec-tls
3489 // option, or the "aix-small-tls" global variable attribute.
3490 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3491 report_fatal_error("The small-local-exec TLS access sequence is "
3492 "currently only supported on AIX (64-bit mode).");
3493 }
3494 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3495 }
3496
3497 if (Model == TLSModel::LocalDynamic) {
3498 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3499
3500 // We do not implement the 32-bit version of the faster access sequence
3501 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3502 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3503 report_fatal_error("The small-local-dynamic TLS access sequence is "
3504 "currently only supported on AIX (64-bit mode).");
3505
3506 // For local-dynamic on AIX, we need to generate one TOC entry for each
3507 // variable offset, and a single module-handle TOC entry for the entire
3508 // file.
3509
3510 SDValue VariableOffsetTGA =
3511 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3512 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3513
3515 GlobalVariable *TLSGV =
3516 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3517 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3519 assert(TLSGV && "Not able to create GV for _$TLSML.");
3520 SDValue ModuleHandleTGA =
3521 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3522 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3523 SDValue ModuleHandle =
3524 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3525
3526 // With the -maix-small-local-dynamic-tls option, produce a faster access
3527 // sequence for local-dynamic TLS variables where the offset from the
3528 // module-handle is encoded as an immediate operand.
3529 //
3530 // We only utilize the faster local-dynamic access sequence when the TLS
3531 // variable has a size within the policy limit. We treat types that are
3532 // not sized or are empty as being over the policy size limit.
3533 if (HasAIXSmallLocalDynamicTLS) {
3534 Type *GVType = GV->getValueType();
3535 if (GVType->isSized() && !GVType->isEmptyTy() &&
3536 GV->getDataLayout().getTypeAllocSize(GVType) <=
3538 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3539 ModuleHandle);
3540 }
3541
3542 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3543 }
3544
3545 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3546 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3547 // need to generate two TOC entries, one for the variable offset, one for the
3548 // region handle. The global address for the TOC entry of the region handle is
3549 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3550 // entry of the variable offset is created with MO_TLSGD_FLAG.
3551 SDValue VariableOffsetTGA =
3552 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3553 SDValue RegionHandleTGA =
3554 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3555 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3556 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3557 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3558 RegionHandle);
3559}
3560
3561SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3562 SelectionDAG &DAG) const {
3563 // FIXME: TLS addresses currently use medium model code sequences,
3564 // which is the most useful form. Eventually support for small and
3565 // large models could be added if users need it, at the cost of
3566 // additional complexity.
3568 if (DAG.getTarget().useEmulatedTLS())
3569 return LowerToTLSEmulatedModel(GA, DAG);
3570
3571 SDLoc dl(GA);
3572 const GlobalValue *GV = GA->getGlobal();
3573 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3574 bool is64bit = Subtarget.isPPC64();
3575 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3576 PICLevel::Level picLevel = M->getPICLevel();
3577
3579 TLSModel::Model Model = TM.getTLSModel(GV);
3580
3581 if (Model == TLSModel::LocalExec) {
3582 if (Subtarget.isUsingPCRelativeCalls()) {
3583 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3584 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3586 SDValue MatAddr =
3587 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3588 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3589 }
3590
3591 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3593 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3595 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3596 : DAG.getRegister(PPC::R2, MVT::i32);
3597
3598 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3599 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3600 }
3601
3602 if (Model == TLSModel::InitialExec) {
3603 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3605 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3606 SDValue TGATLS = DAG.getTargetGlobalAddress(
3607 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3608 SDValue TPOffset;
3609 if (IsPCRel) {
3610 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3611 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3613 } else {
3614 SDValue GOTPtr;
3615 if (is64bit) {
3616 setUsesTOCBasePtr(DAG);
3617 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3618 GOTPtr =
3619 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3620 } else {
3621 if (!TM.isPositionIndependent())
3622 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3623 else if (picLevel == PICLevel::SmallPIC)
3624 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3625 else
3626 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3627 }
3628 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3629 }
3630 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3631 }
3632
3633 if (Model == TLSModel::GeneralDynamic) {
3634 if (Subtarget.isUsingPCRelativeCalls()) {
3635 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3637 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3638 }
3639
3640 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3641 SDValue GOTPtr;
3642 if (is64bit) {
3643 setUsesTOCBasePtr(DAG);
3644 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3645 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3646 GOTReg, TGA);
3647 } else {
3648 if (picLevel == PICLevel::SmallPIC)
3649 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3650 else
3651 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3652 }
3653 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3654 GOTPtr, TGA, TGA);
3655 }
3656
3657 if (Model == TLSModel::LocalDynamic) {
3658 if (Subtarget.isUsingPCRelativeCalls()) {
3659 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3661 SDValue MatPCRel =
3662 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3663 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3664 }
3665
3666 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3667 SDValue GOTPtr;
3668 if (is64bit) {
3669 setUsesTOCBasePtr(DAG);
3670 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3671 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3672 GOTReg, TGA);
3673 } else {
3674 if (picLevel == PICLevel::SmallPIC)
3675 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3676 else
3677 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3678 }
3679 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3680 PtrVT, GOTPtr, TGA, TGA);
3681 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3682 PtrVT, TLSAddr, TGA);
3683 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3684 }
3685
3686 llvm_unreachable("Unknown TLS model!");
3687}
3688
3689SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3690 SelectionDAG &DAG) const {
3691 EVT PtrVT = Op.getValueType();
3693 SDLoc DL(GSDN);
3694 const GlobalValue *GV = GSDN->getGlobal();
3695
3696 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3697 // The actual address of the GlobalValue is stored in the TOC.
3698 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3699 if (Subtarget.isUsingPCRelativeCalls()) {
3700 EVT Ty = getPointerTy(DAG.getDataLayout());
3702 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3704 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3705 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3707 return Load;
3708 } else {
3709 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3711 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3712 }
3713 }
3714 setUsesTOCBasePtr(DAG);
3715 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3716 return getTOCEntry(DAG, DL, GA);
3717 }
3718
3719 unsigned MOHiFlag, MOLoFlag;
3720 bool IsPIC = isPositionIndependent();
3721 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3722
3723 if (IsPIC && Subtarget.isSVR4ABI()) {
3724 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3725 GSDN->getOffset(),
3727 return getTOCEntry(DAG, DL, GA);
3728 }
3729
3730 SDValue GAHi =
3731 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3732 SDValue GALo =
3733 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3734
3735 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3736}
3737
3738SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3739 bool IsStrict = Op->isStrictFPOpcode();
3741 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3742 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3743 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3744 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3745 EVT LHSVT = LHS.getValueType();
3746 SDLoc dl(Op);
3747
3748 // Soften the setcc with libcall if it is fp128.
3749 if (LHSVT == MVT::f128) {
3750 assert(!Subtarget.hasP9Vector() &&
3751 "SETCC for f128 is already legal under Power9!");
3752 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3753 Op->getOpcode() == ISD::STRICT_FSETCCS);
3754 if (RHS.getNode())
3755 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3756 DAG.getCondCode(CC));
3757 if (IsStrict)
3758 return DAG.getMergeValues({LHS, Chain}, dl);
3759 return LHS;
3760 }
3761
3762 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3763
3764 if (Op.getValueType() == MVT::v2i64) {
3765 // When the operands themselves are v2i64 values, we need to do something
3766 // special because VSX has no underlying comparison operations for these.
3767 if (LHS.getValueType() == MVT::v2i64) {
3768 // Equality can be handled by casting to the legal type for Altivec
3769 // comparisons, everything else needs to be expanded.
3770 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3771 return SDValue();
3772 SDValue SetCC32 = DAG.getSetCC(
3773 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3774 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3775 int ShuffV[] = {1, 0, 3, 2};
3776 SDValue Shuff =
3777 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3778 return DAG.getBitcast(MVT::v2i64,
3780 dl, MVT::v4i32, Shuff, SetCC32));
3781 }
3782
3783 // We handle most of these in the usual way.
3784 return Op;
3785 }
3786
3787 // If we're comparing for equality to zero, expose the fact that this is
3788 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3789 // fold the new nodes.
3790 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3791 return V;
3792
3794 // Leave comparisons against 0 and -1 alone for now, since they're usually
3795 // optimized. FIXME: revisit this when we can custom lower all setcc
3796 // optimizations.
3797 if (C->isAllOnes() || C->isZero())
3798 return SDValue();
3799 }
3800
3801 // If we have an integer seteq/setne, turn it into a compare against zero
3802 // by xor'ing the rhs with the lhs, which is faster than setting a
3803 // condition register, reading it back out, and masking the correct bit. The
3804 // normal approach here uses sub to do this instead of xor. Using xor exposes
3805 // the result to other bit-twiddling opportunities.
3806 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3807 EVT VT = Op.getValueType();
3808 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3809 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3810 }
3811 return SDValue();
3812}
3813
3814SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3815 SDNode *Node = Op.getNode();
3816 EVT VT = Node->getValueType(0);
3817 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3818 SDValue InChain = Node->getOperand(0);
3819 SDValue VAListPtr = Node->getOperand(1);
3820 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3821 SDLoc dl(Node);
3822
3823 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3824
3825 // gpr_index
3826 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3827 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3828 InChain = GprIndex.getValue(1);
3829
3830 if (VT == MVT::i64) {
3831 // Check if GprIndex is even
3832 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3833 DAG.getConstant(1, dl, MVT::i32));
3834 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3835 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3836 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3837 DAG.getConstant(1, dl, MVT::i32));
3838 // Align GprIndex to be even if it isn't
3839 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3840 GprIndex);
3841 }
3842
3843 // fpr index is 1 byte after gpr
3844 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3845 DAG.getConstant(1, dl, MVT::i32));
3846
3847 // fpr
3848 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3849 FprPtr, MachinePointerInfo(SV), MVT::i8);
3850 InChain = FprIndex.getValue(1);
3851
3852 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3853 DAG.getConstant(8, dl, MVT::i32));
3854
3855 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3856 DAG.getConstant(4, dl, MVT::i32));
3857
3858 // areas
3859 SDValue OverflowArea =
3860 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3861 InChain = OverflowArea.getValue(1);
3862
3863 SDValue RegSaveArea =
3864 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3865 InChain = RegSaveArea.getValue(1);
3866
3867 // select overflow_area if index > 8
3868 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3869 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3870
3871 // adjustment constant gpr_index * 4/8
3872 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3873 VT.isInteger() ? GprIndex : FprIndex,
3874 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3875 MVT::i32));
3876
3877 // OurReg = RegSaveArea + RegConstant
3878 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3879 RegConstant);
3880
3881 // Floating types are 32 bytes into RegSaveArea
3882 if (VT.isFloatingPoint())
3883 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3884 DAG.getConstant(32, dl, MVT::i32));
3885
3886 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3887 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3888 VT.isInteger() ? GprIndex : FprIndex,
3889 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3890 MVT::i32));
3891
3892 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3893 VT.isInteger() ? VAListPtr : FprPtr,
3894 MachinePointerInfo(SV), MVT::i8);
3895
3896 // determine if we should load from reg_save_area or overflow_area
3897 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3898
3899 // increase overflow_area by 4/8 if gpr/fpr > 8
3900 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3901 DAG.getConstant(VT.isInteger() ? 4 : 8,
3902 dl, MVT::i32));
3903
3904 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3905 OverflowAreaPlusN);
3906
3907 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3908 MachinePointerInfo(), MVT::i32);
3909
3910 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3911}
3912
3913SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3914 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3915
3916 // We have to copy the entire va_list struct:
3917 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3918 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3919 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3920 false, true, /*CI=*/nullptr, std::nullopt,
3922}
3923
3924SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3925 SelectionDAG &DAG) const {
3926 if (Subtarget.isAIXABI())
3927 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3928
3929 return Op.getOperand(0);
3930}
3931
3932SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3935
3936 assert((Op.getOpcode() == ISD::INLINEASM ||
3937 Op.getOpcode() == ISD::INLINEASM_BR) &&
3938 "Expecting Inline ASM node.");
3939
3940 // If an LR store is already known to be required then there is not point in
3941 // checking this ASM as well.
3942 if (MFI.isLRStoreRequired())
3943 return Op;
3944
3945 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3946 // type MVT::Glue. We want to ignore this last operand if that is the case.
3947 unsigned NumOps = Op.getNumOperands();
3948 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3949 --NumOps;
3950
3951 // Check all operands that may contain the LR.
3952 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3953 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3954 unsigned NumVals = Flags.getNumOperandRegisters();
3955 ++i; // Skip the ID value.
3956
3957 switch (Flags.getKind()) {
3958 default:
3959 llvm_unreachable("Bad flags!");
3963 i += NumVals;
3964 break;
3968 for (; NumVals; --NumVals, ++i) {
3969 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3970 if (Reg != PPC::LR && Reg != PPC::LR8)
3971 continue;
3972 MFI.setLRStoreRequired();
3973 return Op;
3974 }
3975 break;
3976 }
3977 }
3978 }
3979
3980 return Op;
3981}
3982
3983SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3984 SelectionDAG &DAG) const {
3985 if (Subtarget.isAIXABI())
3986 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3987
3988 SDValue Chain = Op.getOperand(0);
3989 SDValue Trmp = Op.getOperand(1); // trampoline
3990 SDValue FPtr = Op.getOperand(2); // nested function
3991 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3992 SDLoc dl(Op);
3993
3994 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3995 bool isPPC64 = (PtrVT == MVT::i64);
3996 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3997
4000
4001 Entry.Ty = IntPtrTy;
4002 Entry.Node = Trmp; Args.push_back(Entry);
4003
4004 // TrampSize == (isPPC64 ? 48 : 40);
4005 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
4006 isPPC64 ? MVT::i64 : MVT::i32);
4007 Args.push_back(Entry);
4008
4009 Entry.Node = FPtr; Args.push_back(Entry);
4010 Entry.Node = Nest; Args.push_back(Entry);
4011
4012 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4014 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4016 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4017
4018 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4019 return CallResult.second;
4020}
4021
4022SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4024 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4025 EVT PtrVT = getPointerTy(MF.getDataLayout());
4026
4027 SDLoc dl(Op);
4028
4029 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4030 // vastart just stores the address of the VarArgsFrameIndex slot into the
4031 // memory location argument.
4032 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4033 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4034 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4035 MachinePointerInfo(SV));
4036 }
4037
4038 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4039 // We suppose the given va_list is already allocated.
4040 //
4041 // typedef struct {
4042 // char gpr; /* index into the array of 8 GPRs
4043 // * stored in the register save area
4044 // * gpr=0 corresponds to r3,
4045 // * gpr=1 to r4, etc.
4046 // */
4047 // char fpr; /* index into the array of 8 FPRs
4048 // * stored in the register save area
4049 // * fpr=0 corresponds to f1,
4050 // * fpr=1 to f2, etc.
4051 // */
4052 // char *overflow_arg_area;
4053 // /* location on stack that holds
4054 // * the next overflow argument
4055 // */
4056 // char *reg_save_area;
4057 // /* where r3:r10 and f1:f8 (if saved)
4058 // * are stored
4059 // */
4060 // } va_list[1];
4061
4062 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4063 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4064 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4065 PtrVT);
4066 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4067 PtrVT);
4068
4069 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4070 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4071
4072 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4073 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4074
4075 uint64_t FPROffset = 1;
4076 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4077
4078 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4079
4080 // Store first byte : number of int regs
4081 SDValue firstStore =
4082 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4083 MachinePointerInfo(SV), MVT::i8);
4084 uint64_t nextOffset = FPROffset;
4085 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4086 ConstFPROffset);
4087
4088 // Store second byte : number of float regs
4089 SDValue secondStore =
4090 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4091 MachinePointerInfo(SV, nextOffset), MVT::i8);
4092 nextOffset += StackOffset;
4093 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4094
4095 // Store second word : arguments given on stack
4096 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4097 MachinePointerInfo(SV, nextOffset));
4098 nextOffset += FrameOffset;
4099 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4100
4101 // Store third word : arguments given in registers
4102 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4103 MachinePointerInfo(SV, nextOffset));
4104}
4105
4106/// FPR - The set of FP registers that should be allocated for arguments
4107/// on Darwin and AIX.
4108static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4109 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4110 PPC::F11, PPC::F12, PPC::F13};
4111
4112/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4113/// the stack.
4114static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4115 unsigned PtrByteSize) {
4116 unsigned ArgSize = ArgVT.getStoreSize();
4117 if (Flags.isByVal())
4118 ArgSize = Flags.getByValSize();
4119
4120 // Round up to multiples of the pointer size, except for array members,
4121 // which are always packed.
4122 if (!Flags.isInConsecutiveRegs())
4123 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4124
4125 return ArgSize;
4126}
4127
4128/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4129/// on the stack.
4131 ISD::ArgFlagsTy Flags,
4132 unsigned PtrByteSize) {
4133 Align Alignment(PtrByteSize);
4134
4135 // Altivec parameters are padded to a 16 byte boundary.
4136 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4137 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4138 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4139 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4140 Alignment = Align(16);
4141
4142 // ByVal parameters are aligned as requested.
4143 if (Flags.isByVal()) {
4144 auto BVAlign = Flags.getNonZeroByValAlign();
4145 if (BVAlign > PtrByteSize) {
4146 if (BVAlign.value() % PtrByteSize != 0)
4148 "ByVal alignment is not a multiple of the pointer size");
4149
4150 Alignment = BVAlign;
4151 }
4152 }
4153
4154 // Array members are always packed to their original alignment.
4155 if (Flags.isInConsecutiveRegs()) {
4156 // If the array member was split into multiple registers, the first
4157 // needs to be aligned to the size of the full type. (Except for
4158 // ppcf128, which is only aligned as its f64 components.)
4159 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4160 Alignment = Align(OrigVT.getStoreSize());
4161 else
4162 Alignment = Align(ArgVT.getStoreSize());
4163 }
4164
4165 return Alignment;
4166}
4167
4168/// CalculateStackSlotUsed - Return whether this argument will use its
4169/// stack slot (instead of being passed in registers). ArgOffset,
4170/// AvailableFPRs, and AvailableVRs must hold the current argument
4171/// position, and will be updated to account for this argument.
4172static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4173 unsigned PtrByteSize, unsigned LinkageSize,
4174 unsigned ParamAreaSize, unsigned &ArgOffset,
4175 unsigned &AvailableFPRs,
4176 unsigned &AvailableVRs) {
4177 bool UseMemory = false;
4178
4179 // Respect alignment of argument on the stack.
4180 Align Alignment =
4181 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4182 ArgOffset = alignTo(ArgOffset, Alignment);
4183 // If there's no space left in the argument save area, we must
4184 // use memory (this check also catches zero-sized arguments).
4185 if (ArgOffset >= LinkageSize + ParamAreaSize)
4186 UseMemory = true;
4187
4188 // Allocate argument on the stack.
4189 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4190 if (Flags.isInConsecutiveRegsLast())
4191 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4192 // If we overran the argument save area, we must use memory
4193 // (this check catches arguments passed partially in memory)
4194 if (ArgOffset > LinkageSize + ParamAreaSize)
4195 UseMemory = true;
4196
4197 // However, if the argument is actually passed in an FPR or a VR,
4198 // we don't use memory after all.
4199 if (!Flags.isByVal()) {
4200 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4201 if (AvailableFPRs > 0) {
4202 --AvailableFPRs;
4203 return false;
4204 }
4205 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4206 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4207 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4208 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4209 if (AvailableVRs > 0) {
4210 --AvailableVRs;
4211 return false;
4212 }
4213 }
4214
4215 return UseMemory;
4216}
4217
4218/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4219/// ensure minimum alignment required for target.
4221 unsigned NumBytes) {
4222 return alignTo(NumBytes, Lowering->getStackAlign());
4223}
4224
4225SDValue PPCTargetLowering::LowerFormalArguments(
4226 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4227 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4228 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4229 if (Subtarget.isAIXABI())
4230 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4231 InVals);
4232 if (Subtarget.is64BitELFABI())
4233 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4234 InVals);
4235 assert(Subtarget.is32BitELFABI());
4236 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4237 InVals);
4238}
4239
4240SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4241 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4242 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4243 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4244
4245 // 32-bit SVR4 ABI Stack Frame Layout:
4246 // +-----------------------------------+
4247 // +--> | Back chain |
4248 // | +-----------------------------------+
4249 // | | Floating-point register save area |
4250 // | +-----------------------------------+
4251 // | | General register save area |
4252 // | +-----------------------------------+
4253 // | | CR save word |
4254 // | +-----------------------------------+
4255 // | | VRSAVE save word |
4256 // | +-----------------------------------+
4257 // | | Alignment padding |
4258 // | +-----------------------------------+
4259 // | | Vector register save area |
4260 // | +-----------------------------------+
4261 // | | Local variable space |
4262 // | +-----------------------------------+
4263 // | | Parameter list area |
4264 // | +-----------------------------------+
4265 // | | LR save word |
4266 // | +-----------------------------------+
4267 // SP--> +--- | Back chain |
4268 // +-----------------------------------+
4269 //
4270 // Specifications:
4271 // System V Application Binary Interface PowerPC Processor Supplement
4272 // AltiVec Technology Programming Interface Manual
4273
4275 MachineFrameInfo &MFI = MF.getFrameInfo();
4276 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4277
4278 EVT PtrVT = getPointerTy(MF.getDataLayout());
4279 // Potential tail calls could cause overwriting of argument stack slots.
4280 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4281 (CallConv == CallingConv::Fast));
4282 const Align PtrAlign(4);
4283
4284 // Assign locations to all of the incoming arguments.
4286 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4287 *DAG.getContext());
4288
4289 // Reserve space for the linkage area on the stack.
4290 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4291 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4292 if (useSoftFloat())
4293 CCInfo.PreAnalyzeFormalArguments(Ins);
4294
4295 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4296 CCInfo.clearWasPPCF128();
4297
4298 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4299 CCValAssign &VA = ArgLocs[i];
4300
4301 // Arguments stored in registers.
4302 if (VA.isRegLoc()) {
4303 const TargetRegisterClass *RC;
4304 EVT ValVT = VA.getValVT();
4305
4306 switch (ValVT.getSimpleVT().SimpleTy) {
4307 default:
4308 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4309 case MVT::i1:
4310 case MVT::i32:
4311 RC = &PPC::GPRCRegClass;
4312 break;
4313 case MVT::f32:
4314 if (Subtarget.hasP8Vector())
4315 RC = &PPC::VSSRCRegClass;
4316 else if (Subtarget.hasSPE())
4317 RC = &PPC::GPRCRegClass;
4318 else
4319 RC = &PPC::F4RCRegClass;
4320 break;
4321 case MVT::f64:
4322 if (Subtarget.hasVSX())
4323 RC = &PPC::VSFRCRegClass;
4324 else if (Subtarget.hasSPE())
4325 // SPE passes doubles in GPR pairs.
4326 RC = &PPC::GPRCRegClass;
4327 else
4328 RC = &PPC::F8RCRegClass;
4329 break;
4330 case MVT::v16i8:
4331 case MVT::v8i16:
4332 case MVT::v4i32:
4333 RC = &PPC::VRRCRegClass;
4334 break;
4335 case MVT::v4f32:
4336 RC = &PPC::VRRCRegClass;
4337 break;
4338 case MVT::v2f64:
4339 case MVT::v2i64:
4340 RC = &PPC::VRRCRegClass;
4341 break;
4342 }
4343
4344 SDValue ArgValue;
4345 // Transform the arguments stored in physical registers into
4346 // virtual ones.
4347 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4348 assert(i + 1 < e && "No second half of double precision argument");
4349 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4350 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4351 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4352 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4353 if (!Subtarget.isLittleEndian())
4354 std::swap (ArgValueLo, ArgValueHi);
4355 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4356 ArgValueHi);
4357 } else {
4358 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4359 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4360 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4361 if (ValVT == MVT::i1)
4362 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4363 }
4364
4365 InVals.push_back(ArgValue);
4366 } else {
4367 // Argument stored in memory.
4368 assert(VA.isMemLoc());
4369
4370 // Get the extended size of the argument type in stack
4371 unsigned ArgSize = VA.getLocVT().getStoreSize();
4372 // Get the actual size of the argument type
4373 unsigned ObjSize = VA.getValVT().getStoreSize();
4374 unsigned ArgOffset = VA.getLocMemOffset();
4375 // Stack objects in PPC32 are right justified.
4376 ArgOffset += ArgSize - ObjSize;
4377 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4378
4379 // Create load nodes to retrieve arguments from the stack.
4380 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4381 InVals.push_back(
4382 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4383 }
4384 }
4385
4386 // Assign locations to all of the incoming aggregate by value arguments.
4387 // Aggregates passed by value are stored in the local variable space of the
4388 // caller's stack frame, right above the parameter list area.
4389 SmallVector<CCValAssign, 16> ByValArgLocs;
4390 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4391 ByValArgLocs, *DAG.getContext());
4392
4393 // Reserve stack space for the allocations in CCInfo.
4394 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4395
4396 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4397
4398 // Area that is at least reserved in the caller of this function.
4399 unsigned MinReservedArea = CCByValInfo.getStackSize();
4400 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4401
4402 // Set the size that is at least reserved in caller of this function. Tail
4403 // call optimized function's reserved stack space needs to be aligned so that
4404 // taking the difference between two stack areas will result in an aligned
4405 // stack.
4406 MinReservedArea =
4407 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4408 FuncInfo->setMinReservedArea(MinReservedArea);
4409
4411
4412 // If the function takes variable number of arguments, make a frame index for
4413 // the start of the first vararg value... for expansion of llvm.va_start.
4414 if (isVarArg) {
4415 static const MCPhysReg GPArgRegs[] = {
4416 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4417 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4418 };
4419 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4420
4421 static const MCPhysReg FPArgRegs[] = {
4422 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4423 PPC::F8
4424 };
4425 unsigned NumFPArgRegs = std::size(FPArgRegs);
4426
4427 if (useSoftFloat() || hasSPE())
4428 NumFPArgRegs = 0;
4429
4430 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4431 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4432
4433 // Make room for NumGPArgRegs and NumFPArgRegs.
4434 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4435 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4436
4438 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4439
4440 FuncInfo->setVarArgsFrameIndex(
4441 MFI.CreateStackObject(Depth, Align(8), false));
4442 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4443
4444 // The fixed integer arguments of a variadic function are stored to the
4445 // VarArgsFrameIndex on the stack so that they may be loaded by
4446 // dereferencing the result of va_next.
4447 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4448 // Get an existing live-in vreg, or add a new one.
4449 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4450 if (!VReg)
4451 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4452
4453 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4454 SDValue Store =
4455 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4456 MemOps.push_back(Store);
4457 // Increment the address by four for the next argument to store
4458 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4459 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4460 }
4461
4462 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4463 // is set.
4464 // The double arguments are stored to the VarArgsFrameIndex
4465 // on the stack.
4466 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4467 // Get an existing live-in vreg, or add a new one.
4468 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4469 if (!VReg)
4470 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4471
4472 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4473 SDValue Store =
4474 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4475 MemOps.push_back(Store);
4476 // Increment the address by eight for the next argument to store
4477 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4478 PtrVT);
4479 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4480 }
4481 }
4482
4483 if (!MemOps.empty())
4484 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4485
4486 return Chain;
4487}
4488
4489// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4490// value to MVT::i64 and then truncate to the correct register size.
4491SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4492 EVT ObjectVT, SelectionDAG &DAG,
4493 SDValue ArgVal,
4494 const SDLoc &dl) const {
4495 if (Flags.isSExt())
4496 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4497 DAG.getValueType(ObjectVT));
4498 else if (Flags.isZExt())
4499 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4500 DAG.getValueType(ObjectVT));
4501
4502 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4503}
4504
4505SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4506 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4507 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4508 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4509 // TODO: add description of PPC stack frame format, or at least some docs.
4510 //
4511 bool isELFv2ABI = Subtarget.isELFv2ABI();
4512 bool isLittleEndian = Subtarget.isLittleEndian();
4514 MachineFrameInfo &MFI = MF.getFrameInfo();
4515 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4516
4517 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4518 "fastcc not supported on varargs functions");
4519
4520 EVT PtrVT = getPointerTy(MF.getDataLayout());
4521 // Potential tail calls could cause overwriting of argument stack slots.
4522 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4523 (CallConv == CallingConv::Fast));
4524 unsigned PtrByteSize = 8;
4525 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4526
4527 static const MCPhysReg GPR[] = {
4528 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4529 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4530 };
4531 static const MCPhysReg VR[] = {
4532 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4533 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4534 };
4535
4536 const unsigned Num_GPR_Regs = std::size(GPR);
4537 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4538 const unsigned Num_VR_Regs = std::size(VR);
4539
4540 // Do a first pass over the arguments to determine whether the ABI
4541 // guarantees that our caller has allocated the parameter save area
4542 // on its stack frame. In the ELFv1 ABI, this is always the case;
4543 // in the ELFv2 ABI, it is true if this is a vararg function or if
4544 // any parameter is located in a stack slot.
4545
4546 bool HasParameterArea = !isELFv2ABI || isVarArg;
4547 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4548 unsigned NumBytes = LinkageSize;
4549 unsigned AvailableFPRs = Num_FPR_Regs;
4550 unsigned AvailableVRs = Num_VR_Regs;
4551 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4552 if (Ins[i].Flags.isNest())
4553 continue;
4554
4555 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4556 PtrByteSize, LinkageSize, ParamAreaSize,
4557 NumBytes, AvailableFPRs, AvailableVRs))
4558 HasParameterArea = true;
4559 }
4560
4561 // Add DAG nodes to load the arguments or copy them out of registers. On
4562 // entry to a function on PPC, the arguments start after the linkage area,
4563 // although the first ones are often in registers.
4564
4565 unsigned ArgOffset = LinkageSize;
4566 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4569 unsigned CurArgIdx = 0;
4570 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4571 SDValue ArgVal;
4572 bool needsLoad = false;
4573 EVT ObjectVT = Ins[ArgNo].VT;
4574 EVT OrigVT = Ins[ArgNo].ArgVT;
4575 unsigned ObjSize = ObjectVT.getStoreSize();
4576 unsigned ArgSize = ObjSize;
4577 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4578 if (Ins[ArgNo].isOrigArg()) {
4579 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4580 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4581 }
4582 // We re-align the argument offset for each argument, except when using the
4583 // fast calling convention, when we need to make sure we do that only when
4584 // we'll actually use a stack slot.
4585 unsigned CurArgOffset;
4586 Align Alignment;
4587 auto ComputeArgOffset = [&]() {
4588 /* Respect alignment of argument on the stack. */
4589 Alignment =
4590 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4591 ArgOffset = alignTo(ArgOffset, Alignment);
4592 CurArgOffset = ArgOffset;
4593 };
4594
4595 if (CallConv != CallingConv::Fast) {
4596 ComputeArgOffset();
4597
4598 /* Compute GPR index associated with argument offset. */
4599 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4600 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4601 }
4602
4603 // FIXME the codegen can be much improved in some cases.
4604 // We do not have to keep everything in memory.
4605 if (Flags.isByVal()) {
4606 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4607
4608 if (CallConv == CallingConv::Fast)
4609 ComputeArgOffset();
4610
4611 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4612 ObjSize = Flags.getByValSize();
4613 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4614 // Empty aggregate parameters do not take up registers. Examples:
4615 // struct { } a;
4616 // union { } b;
4617 // int c[0];
4618 // etc. However, we have to provide a place-holder in InVals, so
4619 // pretend we have an 8-byte item at the current address for that
4620 // purpose.
4621 if (!ObjSize) {
4622 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4623 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4624 InVals.push_back(FIN);
4625 continue;
4626 }
4627
4628 // Create a stack object covering all stack doublewords occupied
4629 // by the argument. If the argument is (fully or partially) on
4630 // the stack, or if the argument is fully in registers but the
4631 // caller has allocated the parameter save anyway, we can refer
4632 // directly to the caller's stack frame. Otherwise, create a
4633 // local copy in our own frame.
4634 int FI;
4635 if (HasParameterArea ||
4636 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4637 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4638 else
4639 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4640 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4641
4642 // Handle aggregates smaller than 8 bytes.
4643 if (ObjSize < PtrByteSize) {
4644 // The value of the object is its address, which differs from the
4645 // address of the enclosing doubleword on big-endian systems.
4646 SDValue Arg = FIN;
4647 if (!isLittleEndian) {
4648 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4649 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4650 }
4651 InVals.push_back(Arg);
4652
4653 if (GPR_idx != Num_GPR_Regs) {
4654 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4655 FuncInfo->addLiveInAttr(VReg, Flags);
4656 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4657 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4658 SDValue Store =
4659 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4660 MachinePointerInfo(&*FuncArg), ObjType);
4661 MemOps.push_back(Store);
4662 }
4663 // Whether we copied from a register or not, advance the offset
4664 // into the parameter save area by a full doubleword.
4665 ArgOffset += PtrByteSize;
4666 continue;
4667 }
4668
4669 // The value of the object is its address, which is the address of
4670 // its first stack doubleword.
4671 InVals.push_back(FIN);
4672
4673 // Store whatever pieces of the object are in registers to memory.
4674 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4675 if (GPR_idx == Num_GPR_Regs)
4676 break;
4677
4678 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4679 FuncInfo->addLiveInAttr(VReg, Flags);
4680 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4681 SDValue Addr = FIN;
4682 if (j) {
4683 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4684 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4685 }
4686 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4687 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4688 SDValue Store =
4689 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4690 MachinePointerInfo(&*FuncArg, j), ObjType);
4691 MemOps.push_back(Store);
4692 ++GPR_idx;
4693 }
4694 ArgOffset += ArgSize;
4695 continue;
4696 }
4697
4698 switch (ObjectVT.getSimpleVT().SimpleTy) {
4699 default: llvm_unreachable("Unhandled argument type!");
4700 case MVT::i1:
4701 case MVT::i32:
4702 case MVT::i64:
4703 if (Flags.isNest()) {
4704 // The 'nest' parameter, if any, is passed in R11.
4705 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4706 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4707
4708 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4709 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4710
4711 break;
4712 }
4713
4714 // These can be scalar arguments or elements of an integer array type
4715 // passed directly. Clang may use those instead of "byval" aggregate
4716 // types to avoid forcing arguments to memory unnecessarily.
4717 if (GPR_idx != Num_GPR_Regs) {
4718 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4719 FuncInfo->addLiveInAttr(VReg, Flags);
4720 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4721
4722 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4723 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4724 // value to MVT::i64 and then truncate to the correct register size.
4725 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4726 } else {
4727 if (CallConv == CallingConv::Fast)
4728 ComputeArgOffset();
4729
4730 needsLoad = true;
4731 ArgSize = PtrByteSize;
4732 }
4733 if (CallConv != CallingConv::Fast || needsLoad)
4734 ArgOffset += 8;
4735 break;
4736
4737 case MVT::f32:
4738 case MVT::f64:
4739 // These can be scalar arguments or elements of a float array type
4740 // passed directly. The latter are used to implement ELFv2 homogenous
4741 // float aggregates.
4742 if (FPR_idx != Num_FPR_Regs) {
4743 unsigned VReg;
4744
4745 if (ObjectVT == MVT::f32)
4746 VReg = MF.addLiveIn(FPR[FPR_idx],
4747 Subtarget.hasP8Vector()
4748 ? &PPC::VSSRCRegClass
4749 : &PPC::F4RCRegClass);
4750 else
4751 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4752 ? &PPC::VSFRCRegClass
4753 : &PPC::F8RCRegClass);
4754
4755 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4756 ++FPR_idx;
4757 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4758 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4759 // once we support fp <-> gpr moves.
4760
4761 // This can only ever happen in the presence of f32 array types,
4762 // since otherwise we never run out of FPRs before running out
4763 // of GPRs.
4764 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4765 FuncInfo->addLiveInAttr(VReg, Flags);
4766 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4767
4768 if (ObjectVT == MVT::f32) {
4769 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4770 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4771 DAG.getConstant(32, dl, MVT::i32));
4772 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4773 }
4774
4775 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4776 } else {
4777 if (CallConv == CallingConv::Fast)
4778 ComputeArgOffset();
4779
4780 needsLoad = true;
4781 }
4782
4783 // When passing an array of floats, the array occupies consecutive
4784 // space in the argument area; only round up to the next doubleword
4785 // at the end of the array. Otherwise, each float takes 8 bytes.
4786 if (CallConv != CallingConv::Fast || needsLoad) {
4787 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4788 ArgOffset += ArgSize;
4789 if (Flags.isInConsecutiveRegsLast())
4790 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4791 }
4792 break;
4793 case MVT::v4f32:
4794 case MVT::v4i32:
4795 case MVT::v8i16:
4796 case MVT::v16i8:
4797 case MVT::v2f64:
4798 case MVT::v2i64:
4799 case MVT::v1i128:
4800 case MVT::f128:
4801 // These can be scalar arguments or elements of a vector array type
4802 // passed directly. The latter are used to implement ELFv2 homogenous
4803 // vector aggregates.
4804 if (VR_idx != Num_VR_Regs) {
4805 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4806 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4807 ++VR_idx;
4808 } else {
4809 if (CallConv == CallingConv::Fast)
4810 ComputeArgOffset();
4811 needsLoad = true;
4812 }
4813 if (CallConv != CallingConv::Fast || needsLoad)
4814 ArgOffset += 16;
4815 break;
4816 }
4817
4818 // We need to load the argument to a virtual register if we determined
4819 // above that we ran out of physical registers of the appropriate type.
4820 if (needsLoad) {
4821 if (ObjSize < ArgSize && !isLittleEndian)
4822 CurArgOffset += ArgSize - ObjSize;
4823 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4824 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4825 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4826 }
4827
4828 InVals.push_back(ArgVal);
4829 }
4830
4831 // Area that is at least reserved in the caller of this function.
4832 unsigned MinReservedArea;
4833 if (HasParameterArea)
4834 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4835 else
4836 MinReservedArea = LinkageSize;
4837
4838 // Set the size that is at least reserved in caller of this function. Tail
4839 // call optimized functions' reserved stack space needs to be aligned so that
4840 // taking the difference between two stack areas will result in an aligned
4841 // stack.
4842 MinReservedArea =
4843 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4844 FuncInfo->setMinReservedArea(MinReservedArea);
4845
4846 // If the function takes variable number of arguments, make a frame index for
4847 // the start of the first vararg value... for expansion of llvm.va_start.
4848 // On ELFv2ABI spec, it writes:
4849 // C programs that are intended to be *portable* across different compilers
4850 // and architectures must use the header file <stdarg.h> to deal with variable
4851 // argument lists.
4852 if (isVarArg && MFI.hasVAStart()) {
4853 int Depth = ArgOffset;
4854
4855 FuncInfo->setVarArgsFrameIndex(
4856 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4857 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4858
4859 // If this function is vararg, store any remaining integer argument regs
4860 // to their spots on the stack so that they may be loaded by dereferencing
4861 // the result of va_next.
4862 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4863 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4864 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4865 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4866 SDValue Store =
4867 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4868 MemOps.push_back(Store);
4869 // Increment the address by four for the next argument to store
4870 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4871 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4872 }
4873 }
4874
4875 if (!MemOps.empty())
4876 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4877
4878 return Chain;
4879}
4880
4881/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4882/// adjusted to accommodate the arguments for the tailcall.
4883static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4884 unsigned ParamSize) {
4885
4886 if (!isTailCall) return 0;
4887
4889 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4890 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4891 // Remember only if the new adjustment is bigger.
4892 if (SPDiff < FI->getTailCallSPDelta())
4893 FI->setTailCallSPDelta(SPDiff);
4894
4895 return SPDiff;
4896}
4897
4898static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4899
4900static bool callsShareTOCBase(const Function *Caller,
4901 const GlobalValue *CalleeGV,
4902 const TargetMachine &TM) {
4903 // It does not make sense to call callsShareTOCBase() with a caller that
4904 // is PC Relative since PC Relative callers do not have a TOC.
4905#ifndef NDEBUG
4906 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4907 assert(!STICaller->isUsingPCRelativeCalls() &&
4908 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4909#endif
4910
4911 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4912 // don't have enough information to determine if the caller and callee share
4913 // the same TOC base, so we have to pessimistically assume they don't for
4914 // correctness.
4915 if (!CalleeGV)
4916 return false;
4917
4918 // If the callee is preemptable, then the static linker will use a plt-stub
4919 // which saves the toc to the stack, and needs a nop after the call
4920 // instruction to convert to a toc-restore.
4921 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4922 return false;
4923
4924 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4925 // We may need a TOC restore in the situation where the caller requires a
4926 // valid TOC but the callee is PC Relative and does not.
4927 const Function *F = dyn_cast<Function>(CalleeGV);
4928 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4929
4930 // If we have an Alias we can try to get the function from there.
4931 if (Alias) {
4932 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4933 F = dyn_cast<Function>(GlobalObj);
4934 }
4935
4936 // If we still have no valid function pointer we do not have enough
4937 // information to determine if the callee uses PC Relative calls so we must
4938 // assume that it does.
4939 if (!F)
4940 return false;
4941
4942 // If the callee uses PC Relative we cannot guarantee that the callee won't
4943 // clobber the TOC of the caller and so we must assume that the two
4944 // functions do not share a TOC base.
4945 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4946 if (STICallee->isUsingPCRelativeCalls())
4947 return false;
4948
4949 // If the GV is not a strong definition then we need to assume it can be
4950 // replaced by another function at link time. The function that replaces
4951 // it may not share the same TOC as the caller since the callee may be
4952 // replaced by a PC Relative version of the same function.
4953 if (!CalleeGV->isStrongDefinitionForLinker())
4954 return false;
4955
4956 // The medium and large code models are expected to provide a sufficiently
4957 // large TOC to provide all data addressing needs of a module with a
4958 // single TOC.
4959 if (CodeModel::Medium == TM.getCodeModel() ||
4960 CodeModel::Large == TM.getCodeModel())
4961 return true;
4962
4963 // Any explicitly-specified sections and section prefixes must also match.
4964 // Also, if we're using -ffunction-sections, then each function is always in
4965 // a different section (the same is true for COMDAT functions).
4966 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4967 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4968 return false;
4969 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4970 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4971 return false;
4972 }
4973
4974 return true;
4975}
4976
4977static bool
4979 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4980 assert(Subtarget.is64BitELFABI());
4981
4982 const unsigned PtrByteSize = 8;
4983 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4984
4985 static const MCPhysReg GPR[] = {
4986 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4987 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4988 };
4989 static const MCPhysReg VR[] = {
4990 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4991 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4992 };
4993
4994 const unsigned NumGPRs = std::size(GPR);
4995 const unsigned NumFPRs = 13;
4996 const unsigned NumVRs = std::size(VR);
4997 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4998
4999 unsigned NumBytes = LinkageSize;
5000 unsigned AvailableFPRs = NumFPRs;
5001 unsigned AvailableVRs = NumVRs;
5002
5003 for (const ISD::OutputArg& Param : Outs) {
5004 if (Param.Flags.isNest()) continue;
5005
5006 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5007 LinkageSize, ParamAreaSize, NumBytes,
5008 AvailableFPRs, AvailableVRs))
5009 return true;
5010 }
5011 return false;
5012}
5013
5014static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5015 if (CB.arg_size() != CallerFn->arg_size())
5016 return false;
5017
5018 auto CalleeArgIter = CB.arg_begin();
5019 auto CalleeArgEnd = CB.arg_end();
5020 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5021
5022 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5023 const Value* CalleeArg = *CalleeArgIter;
5024 const Value* CallerArg = &(*CallerArgIter);
5025 if (CalleeArg == CallerArg)
5026 continue;
5027
5028 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5029 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5030 // }
5031 // 1st argument of callee is undef and has the same type as caller.
5032 if (CalleeArg->getType() == CallerArg->getType() &&
5033 isa<UndefValue>(CalleeArg))
5034 continue;
5035
5036 return false;
5037 }
5038
5039 return true;
5040}
5041
5042// Returns true if TCO is possible between the callers and callees
5043// calling conventions.
5044static bool
5046 CallingConv::ID CalleeCC) {
5047 // Tail calls are possible with fastcc and ccc.
5048 auto isTailCallableCC = [] (CallingConv::ID CC){
5049 return CC == CallingConv::C || CC == CallingConv::Fast;
5050 };
5051 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5052 return false;
5053
5054 // We can safely tail call both fastcc and ccc callees from a c calling
5055 // convention caller. If the caller is fastcc, we may have less stack space
5056 // than a non-fastcc caller with the same signature so disable tail-calls in
5057 // that case.
5058 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5059}
5060
5061bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5062 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5063 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5065 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5066 bool isCalleeExternalSymbol) const {
5067 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5068
5069 if (DisableSCO && !TailCallOpt) return false;
5070
5071 // Variadic argument functions are not supported.
5072 if (isVarArg) return false;
5073
5074 // Check that the calling conventions are compatible for tco.
5075 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5076 return false;
5077
5078 // Caller contains any byval parameter is not supported.
5079 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5080 return false;
5081
5082 // Callee contains any byval parameter is not supported, too.
5083 // Note: This is a quick work around, because in some cases, e.g.
5084 // caller's stack size > callee's stack size, we are still able to apply
5085 // sibling call optimization. For example, gcc is able to do SCO for caller1
5086 // in the following example, but not for caller2.
5087 // struct test {
5088 // long int a;
5089 // char ary[56];
5090 // } gTest;
5091 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5092 // b->a = v.a;
5093 // return 0;
5094 // }
5095 // void caller1(struct test a, struct test c, struct test *b) {
5096 // callee(gTest, b); }
5097 // void caller2(struct test *b) { callee(gTest, b); }
5098 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5099 return false;
5100
5101 // If callee and caller use different calling conventions, we cannot pass
5102 // parameters on stack since offsets for the parameter area may be different.
5103 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5104 return false;
5105
5106 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5107 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5108 // callee potentially have different TOC bases then we cannot tail call since
5109 // we need to restore the TOC pointer after the call.
5110 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5111 // We cannot guarantee this for indirect calls or calls to external functions.
5112 // When PC-Relative addressing is used, the concept of the TOC is no longer
5113 // applicable so this check is not required.
5114 // Check first for indirect calls.
5115 if (!Subtarget.isUsingPCRelativeCalls() &&
5116 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5117 return false;
5118
5119 // Check if we share the TOC base.
5120 if (!Subtarget.isUsingPCRelativeCalls() &&
5121 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5122 return false;
5123
5124 // TCO allows altering callee ABI, so we don't have to check further.
5125 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5126 return true;
5127
5128 if (DisableSCO) return false;
5129
5130 // If callee use the same argument list that caller is using, then we can
5131 // apply SCO on this case. If it is not, then we need to check if callee needs
5132 // stack for passing arguments.
5133 // PC Relative tail calls may not have a CallBase.
5134 // If there is no CallBase we cannot verify if we have the same argument
5135 // list so assume that we don't have the same argument list.
5136 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5137 needStackSlotPassParameters(Subtarget, Outs))
5138 return false;
5139 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5140 return false;
5141
5142 return true;
5143}
5144
5145/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5146/// for tail call optimization. Targets which want to do tail call
5147/// optimization should implement this function.
5148bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5149 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5150 CallingConv::ID CallerCC, bool isVarArg,
5151 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5152 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5153 return false;
5154
5155 // Variable argument functions are not supported.
5156 if (isVarArg)
5157 return false;
5158
5159 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5160 // Functions containing by val parameters are not supported.
5161 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5162 return false;
5163
5164 // Non-PIC/GOT tail calls are supported.
5165 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5166 return true;
5167
5168 // At the moment we can only do local tail calls (in same module, hidden
5169 // or protected) if we are generating PIC.
5170 if (CalleeGV)
5171 return CalleeGV->hasHiddenVisibility() ||
5172 CalleeGV->hasProtectedVisibility();
5173 }
5174
5175 return false;
5176}
5177
5178/// isCallCompatibleAddress - Return the immediate to use if the specified
5179/// 32-bit value is representable in the immediate field of a BxA instruction.
5182 if (!C) return nullptr;
5183
5184 int Addr = C->getZExtValue();
5185 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5187 return nullptr; // Top 6 bits have to be sext of immediate.
5188
5189 return DAG
5190 .getConstant(
5191 (int)C->getZExtValue() >> 2, SDLoc(Op),
5193 .getNode();
5194}
5195
5196namespace {
5197
5198struct TailCallArgumentInfo {
5199 SDValue Arg;
5200 SDValue FrameIdxOp;
5201 int FrameIdx = 0;
5202
5203 TailCallArgumentInfo() = default;
5204};
5205
5206} // end anonymous namespace
5207
5208/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5210 SelectionDAG &DAG, SDValue Chain,
5211 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5212 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5213 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5214 SDValue Arg = TailCallArgs[i].Arg;
5215 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5216 int FI = TailCallArgs[i].FrameIdx;
5217 // Store relative to framepointer.
5218 MemOpChains.push_back(DAG.getStore(
5219 Chain, dl, Arg, FIN,
5221 }
5222}
5223
5224/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5225/// the appropriate stack slot for the tail call optimized function call.
5227 SDValue OldRetAddr, SDValue OldFP,
5228 int SPDiff, const SDLoc &dl) {
5229 if (SPDiff) {
5230 // Calculate the new stack slot for the return address.
5232 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5233 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5234 bool isPPC64 = Subtarget.isPPC64();
5235 int SlotSize = isPPC64 ? 8 : 4;
5236 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5237 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5238 NewRetAddrLoc, true);
5239 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5240 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5241 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5242 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5243 }
5244 return Chain;
5245}
5246
5247/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5248/// the position of the argument.
5249static void
5251 SDValue Arg, int SPDiff, unsigned ArgOffset,
5252 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5253 int Offset = ArgOffset + SPDiff;
5254 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5255 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5256 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5257 SDValue FIN = DAG.getFrameIndex(FI, VT);
5258 TailCallArgumentInfo Info;
5259 Info.Arg = Arg;
5260 Info.FrameIdxOp = FIN;
5261 Info.FrameIdx = FI;
5262 TailCallArguments.push_back(Info);
5263}
5264
5265/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5266/// stack slot. Returns the chain as result and the loaded frame pointers in
5267/// LROpOut/FPOpout. Used when tail calling.
5268SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5269 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5270 SDValue &FPOpOut, const SDLoc &dl) const {
5271 if (SPDiff) {
5272 // Load the LR and FP stack slot for later adjusting.
5273 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5274 LROpOut = getReturnAddrFrameIndex(DAG);
5275 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5276 Chain = SDValue(LROpOut.getNode(), 1);
5277 }
5278 return Chain;
5279}
5280
5281/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5282/// by "Src" to address "Dst" of size "Size". Alignment information is
5283/// specified by the specific parameter attribute. The copy will be passed as
5284/// a byval function parameter.
5285/// Sometimes what we are copying is the end of a larger object, the part that
5286/// does not fit in registers.
5288 SDValue Chain, ISD::ArgFlagsTy Flags,
5289 SelectionDAG &DAG, const SDLoc &dl) {
5290 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5291 return DAG.getMemcpy(
5292 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5293 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5294}
5295
5296/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5297/// tail calls.
5299 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5300 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5301 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5302 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5304 if (!isTailCall) {
5305 if (isVector) {
5306 SDValue StackPtr;
5307 if (isPPC64)
5308 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5309 else
5310 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5311 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5312 DAG.getConstant(ArgOffset, dl, PtrVT));
5313 }
5314 MemOpChains.push_back(
5315 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5316 // Calculate and remember argument location.
5317 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5318 TailCallArguments);
5319}
5320
5321static void
5323 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5324 SDValue FPOp,
5325 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5326 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5327 // might overwrite each other in case of tail call optimization.
5328 SmallVector<SDValue, 8> MemOpChains2;
5329 // Do not flag preceding copytoreg stuff together with the following stuff.
5330 InGlue = SDValue();
5331 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5332 MemOpChains2, dl);
5333 if (!MemOpChains2.empty())
5334 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5335
5336 // Store the return address to the appropriate stack slot.
5337 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5338
5339 // Emit callseq_end just before tailcall node.
5340 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5341 InGlue = Chain.getValue(1);
5342}
5343
5344// Is this global address that of a function that can be called by name? (as
5345// opposed to something that must hold a descriptor for an indirect call).
5346static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5347 if (GV) {
5348 if (GV->isThreadLocal())
5349 return false;
5350
5351 return GV->getValueType()->isFunctionTy();
5352 }
5353
5354 return false;
5355}
5356
5357SDValue PPCTargetLowering::LowerCallResult(
5358 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5359 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5360 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5362 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5363 *DAG.getContext());
5364
5365 CCRetInfo.AnalyzeCallResult(
5366 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5368 : RetCC_PPC);
5369
5370 // Copy all of the result registers out of their specified physreg.
5371 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5372 CCValAssign &VA = RVLocs[i];
5373 assert(VA.isRegLoc() && "Can only return in registers!");
5374
5375 SDValue Val;
5376
5377 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5378 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5379 InGlue);
5380 Chain = Lo.getValue(1);
5381 InGlue = Lo.getValue(2);
5382 VA = RVLocs[++i]; // skip ahead to next loc
5383 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5384 InGlue);
5385 Chain = Hi.getValue(1);
5386 InGlue = Hi.getValue(2);
5387 if (!Subtarget.isLittleEndian())
5388 std::swap (Lo, Hi);
5389 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5390 } else {
5391 Val = DAG.getCopyFromReg(Chain, dl,
5392 VA.getLocReg(), VA.getLocVT(), InGlue);
5393 Chain = Val.getValue(1);
5394 InGlue = Val.getValue(2);
5395 }
5396
5397 switch (VA.getLocInfo()) {
5398 default: llvm_unreachable("Unknown loc info!");
5399 case CCValAssign::Full: break;
5400 case CCValAssign::AExt:
5401 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5402 break;
5403 case CCValAssign::ZExt:
5404 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5405 DAG.getValueType(VA.getValVT()));
5406 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5407 break;
5408 case CCValAssign::SExt:
5409 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5410 DAG.getValueType(VA.getValVT()));
5411 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5412 break;
5413 }
5414
5415 InVals.push_back(Val);
5416 }
5417
5418 return Chain;
5419}
5420
5421static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5422 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5423 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5424 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5425
5426 // PatchPoint calls are not indirect.
5427 if (isPatchPoint)
5428 return false;
5429
5431 return false;
5432
5433 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5434 // becuase the immediate function pointer points to a descriptor instead of
5435 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5436 // pointer immediate points to the global entry point, while the BLA would
5437 // need to jump to the local entry point (see rL211174).
5438 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5439 isBLACompatibleAddress(Callee, DAG))
5440 return false;
5441
5442 return true;
5443}
5444
5445// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5446static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5447 return Subtarget.isAIXABI() ||
5448 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5449}
5450
5452 const Function &Caller, const SDValue &Callee,
5453 const PPCSubtarget &Subtarget,
5454 const TargetMachine &TM,
5455 bool IsStrictFPCall = false) {
5456 if (CFlags.IsTailCall)
5457 return PPCISD::TC_RETURN;
5458
5459 unsigned RetOpc = 0;
5460 // This is a call through a function pointer.
5461 if (CFlags.IsIndirect) {
5462 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5463 // indirect calls. The save of the caller's TOC pointer to the stack will be
5464 // inserted into the DAG as part of call lowering. The restore of the TOC
5465 // pointer is modeled by using a pseudo instruction for the call opcode that
5466 // represents the 2 instruction sequence of an indirect branch and link,
5467 // immediately followed by a load of the TOC pointer from the stack save
5468 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5469 // as it is not saved or used.
5471 : PPCISD::BCTRL;
5472 } else if (Subtarget.isUsingPCRelativeCalls()) {
5473 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5474 RetOpc = PPCISD::CALL_NOTOC;
5475 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5476 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5477 // immediately following the call instruction if the caller and callee may
5478 // have different TOC bases. At link time if the linker determines the calls
5479 // may not share a TOC base, the call is redirected to a trampoline inserted
5480 // by the linker. The trampoline will (among other things) save the callers
5481 // TOC pointer at an ABI designated offset in the linkage area and the
5482 // linker will rewrite the nop to be a load of the TOC pointer from the
5483 // linkage area into gpr2.
5484 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5485 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5486 RetOpc =
5487 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5488 } else
5489 RetOpc = PPCISD::CALL;
5490 if (IsStrictFPCall) {
5491 switch (RetOpc) {
5492 default:
5493 llvm_unreachable("Unknown call opcode");
5496 break;
5497 case PPCISD::BCTRL:
5498 RetOpc = PPCISD::BCTRL_RM;
5499 break;
5500 case PPCISD::CALL_NOTOC:
5501 RetOpc = PPCISD::CALL_NOTOC_RM;
5502 break;
5503 case PPCISD::CALL:
5504 RetOpc = PPCISD::CALL_RM;
5505 break;
5506 case PPCISD::CALL_NOP:
5507 RetOpc = PPCISD::CALL_NOP_RM;
5508 break;
5509 }
5510 }
5511 return RetOpc;
5512}
5513
5514static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5515 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5516 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5517 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5518 return SDValue(Dest, 0);
5519
5520 // Returns true if the callee is local, and false otherwise.
5521 auto isLocalCallee = [&]() {
5523 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5524
5525 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5527 };
5528
5529 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5530 // a static relocation model causes some versions of GNU LD (2.17.50, at
5531 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5532 // built with secure-PLT.
5533 bool UsePlt =
5534 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5536
5537 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5538 const TargetMachine &TM = Subtarget.getTargetMachine();
5539 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5540 MCSymbolXCOFF *S =
5542
5544 return DAG.getMCSymbol(S, PtrVT);
5545 };
5546
5547 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5548 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5549 if (isFunctionGlobalAddress(GV)) {
5550 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5551
5552 if (Subtarget.isAIXABI()) {
5553 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5554 return getAIXFuncEntryPointSymbolSDNode(GV);
5555 }
5556 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5557 UsePlt ? PPCII::MO_PLT : 0);
5558 }
5559
5561 const char *SymName = S->getSymbol();
5562 if (Subtarget.isAIXABI()) {
5563 // If there exists a user-declared function whose name is the same as the
5564 // ExternalSymbol's, then we pick up the user-declared version.
5566 if (const Function *F =
5567 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5568 return getAIXFuncEntryPointSymbolSDNode(F);
5569
5570 // On AIX, direct function calls reference the symbol for the function's
5571 // entry point, which is named by prepending a "." before the function's
5572 // C-linkage name. A Qualname is returned here because an external
5573 // function entry point is a csect with XTY_ER property.
5574 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5575 auto &Context = DAG.getMachineFunction().getContext();
5576 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5577 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5579 return Sec->getQualNameSymbol();
5580 };
5581
5582 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5583 }
5584 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5585 UsePlt ? PPCII::MO_PLT : 0);
5586 }
5587
5588 // No transformation needed.
5589 assert(Callee.getNode() && "What no callee?");
5590 return Callee;
5591}
5592
5594 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5595 "Expected a CALLSEQ_STARTSDNode.");
5596
5597 // The last operand is the chain, except when the node has glue. If the node
5598 // has glue, then the last operand is the glue, and the chain is the second
5599 // last operand.
5600 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5601 if (LastValue.getValueType() != MVT::Glue)
5602 return LastValue;
5603
5604 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5605}
5606
5607// Creates the node that moves a functions address into the count register
5608// to prepare for an indirect call instruction.
5609static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5610 SDValue &Glue, SDValue &Chain,
5611 const SDLoc &dl) {
5612 SDValue MTCTROps[] = {Chain, Callee, Glue};
5613 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5614 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5615 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5616 // The glue is the second value produced.
5617 Glue = Chain.getValue(1);
5618}
5619
5621 SDValue &Glue, SDValue &Chain,
5622 SDValue CallSeqStart,
5623 const CallBase *CB, const SDLoc &dl,
5624 bool hasNest,
5625 const PPCSubtarget &Subtarget) {
5626 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5627 // entry point, but to the function descriptor (the function entry point
5628 // address is part of the function descriptor though).
5629 // The function descriptor is a three doubleword structure with the
5630 // following fields: function entry point, TOC base address and
5631 // environment pointer.
5632 // Thus for a call through a function pointer, the following actions need
5633 // to be performed:
5634 // 1. Save the TOC of the caller in the TOC save area of its stack
5635 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5636 // 2. Load the address of the function entry point from the function
5637 // descriptor.
5638 // 3. Load the TOC of the callee from the function descriptor into r2.
5639 // 4. Load the environment pointer from the function descriptor into
5640 // r11.
5641 // 5. Branch to the function entry point address.
5642 // 6. On return of the callee, the TOC of the caller needs to be
5643 // restored (this is done in FinishCall()).
5644 //
5645 // The loads are scheduled at the beginning of the call sequence, and the
5646 // register copies are flagged together to ensure that no other
5647 // operations can be scheduled in between. E.g. without flagging the
5648 // copies together, a TOC access in the caller could be scheduled between
5649 // the assignment of the callee TOC and the branch to the callee, which leads
5650 // to incorrect code.
5651
5652 // Start by loading the function address from the descriptor.
5653 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5654 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5658
5659 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5660
5661 // Registers used in building the DAG.
5662 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5663 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5664
5665 // Offsets of descriptor members.
5666 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5667 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5668
5669 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5670 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5671
5672 // One load for the functions entry point address.
5673 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5674 Alignment, MMOFlags);
5675
5676 // One for loading the TOC anchor for the module that contains the called
5677 // function.
5678 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5679 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5680 SDValue TOCPtr =
5681 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5682 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5683
5684 // One for loading the environment pointer.
5685 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5686 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5687 SDValue LoadEnvPtr =
5688 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5689 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5690
5691
5692 // Then copy the newly loaded TOC anchor to the TOC pointer.
5693 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5694 Chain = TOCVal.getValue(0);
5695 Glue = TOCVal.getValue(1);
5696
5697 // If the function call has an explicit 'nest' parameter, it takes the
5698 // place of the environment pointer.
5699 assert((!hasNest || !Subtarget.isAIXABI()) &&
5700 "Nest parameter is not supported on AIX.");
5701 if (!hasNest) {
5702 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5703 Chain = EnvVal.getValue(0);
5704 Glue = EnvVal.getValue(1);
5705 }
5706
5707 // The rest of the indirect call sequence is the same as the non-descriptor
5708 // DAG.
5709 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5710}
5711
5712static void
5714 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5715 SelectionDAG &DAG,
5716 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5717 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5718 const PPCSubtarget &Subtarget) {
5719 const bool IsPPC64 = Subtarget.isPPC64();
5720 // MVT for a general purpose register.
5721 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5722
5723 // First operand is always the chain.
5724 Ops.push_back(Chain);
5725
5726 // If it's a direct call pass the callee as the second operand.
5727 if (!CFlags.IsIndirect)
5728 Ops.push_back(Callee);
5729 else {
5730 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5731
5732 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5733 // on the stack (this would have been done in `LowerCall_64SVR4` or
5734 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5735 // represents both the indirect branch and a load that restores the TOC
5736 // pointer from the linkage area. The operand for the TOC restore is an add
5737 // of the TOC save offset to the stack pointer. This must be the second
5738 // operand: after the chain input but before any other variadic arguments.
5739 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5740 // saved or used.
5741 if (isTOCSaveRestoreRequired(Subtarget)) {
5742 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5743
5744 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5745 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5746 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5747 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5748 Ops.push_back(AddTOC);
5749 }
5750
5751 // Add the register used for the environment pointer.
5752 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5754 RegVT));
5755
5756
5757 // Add CTR register as callee so a bctr can be emitted later.
5758 if (CFlags.IsTailCall)
5759 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5760 }
5761
5762 // If this is a tail call add stack pointer delta.
5763 if (CFlags.IsTailCall)
5764 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5765
5766 // Add argument registers to the end of the list so that they are known live
5767 // into the call.
5768 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5769 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5770 RegsToPass[i].second.getValueType()));
5771
5772 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5773 // no way to mark dependencies as implicit here.
5774 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5775 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5776 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5777 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5778
5779 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5780 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5781 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5782
5783 // Add a register mask operand representing the call-preserved registers.
5784 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5785 const uint32_t *Mask =
5786 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5787 assert(Mask && "Missing call preserved mask for calling convention");
5788 Ops.push_back(DAG.getRegisterMask(Mask));
5789
5790 // If the glue is valid, it is the last operand.
5791 if (Glue.getNode())
5792 Ops.push_back(Glue);
5793}
5794
5795SDValue PPCTargetLowering::FinishCall(
5796 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5797 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5798 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5799 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5800 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5801
5802 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5803 Subtarget.isAIXABI())
5804 setUsesTOCBasePtr(DAG);
5805
5806 unsigned CallOpc =
5807 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5808 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5809
5810 if (!CFlags.IsIndirect)
5811 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5812 else if (Subtarget.usesFunctionDescriptors())
5813 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5814 dl, CFlags.HasNest, Subtarget);
5815 else
5816 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5817
5818 // Build the operand list for the call instruction.
5820 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5821 SPDiff, Subtarget);
5822
5823 // Emit tail call.
5824 if (CFlags.IsTailCall) {
5825 // Indirect tail call when using PC Relative calls do not have the same
5826 // constraints.
5827 assert(((Callee.getOpcode() == ISD::Register &&
5828 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5829 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5830 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5831 isa<ConstantSDNode>(Callee) ||
5832 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5833 "Expecting a global address, external symbol, absolute value, "
5834 "register or an indirect tail call when PC Relative calls are "
5835 "used.");
5836 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5837 assert(CallOpc == PPCISD::TC_RETURN &&
5838 "Unexpected call opcode for a tail call.");
5840 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5841 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5842 return Ret;
5843 }
5844
5845 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5846 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5847 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5848 Glue = Chain.getValue(1);
5849
5850 // When performing tail call optimization the callee pops its arguments off
5851 // the stack. Account for this here so these bytes can be pushed back on in
5852 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5853 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5855 ? NumBytes
5856 : 0;
5857
5858 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5859 Glue = Chain.getValue(1);
5860
5861 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5862 DAG, InVals);
5863}
5864
5866 CallingConv::ID CalleeCC = CB->getCallingConv();
5867 const Function *CallerFunc = CB->getCaller();
5868 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5869 const Function *CalleeFunc = CB->getCalledFunction();
5870 if (!CalleeFunc)
5871 return false;
5872 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5873
5876
5877 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5878 CalleeFunc->getAttributes(), Outs, *this,
5879 CalleeFunc->getDataLayout());
5880
5881 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5882 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5883 false /*isCalleeExternalSymbol*/);
5884}
5885
5886bool PPCTargetLowering::isEligibleForTCO(
5887 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5888 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5890 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5891 bool isCalleeExternalSymbol) const {
5892 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5893 return false;
5894
5895 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5896 return IsEligibleForTailCallOptimization_64SVR4(
5897 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5898 isCalleeExternalSymbol);
5899 else
5900 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5901 isVarArg, Ins);
5902}
5903
5904SDValue
5905PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5906 SmallVectorImpl<SDValue> &InVals) const {
5907 SelectionDAG &DAG = CLI.DAG;
5908 SDLoc &dl = CLI.DL;
5910 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5912 SDValue Chain = CLI.Chain;
5913 SDValue Callee = CLI.Callee;
5914 bool &isTailCall = CLI.IsTailCall;
5915 CallingConv::ID CallConv = CLI.CallConv;
5916 bool isVarArg = CLI.IsVarArg;
5917 bool isPatchPoint = CLI.IsPatchPoint;
5918 const CallBase *CB = CLI.CB;
5919
5920 if (isTailCall) {
5922 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5923 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5924 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5925 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5926
5927 isTailCall =
5928 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5929 &(MF.getFunction()), IsCalleeExternalSymbol);
5930 if (isTailCall) {
5931 ++NumTailCalls;
5932 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5933 ++NumSiblingCalls;
5934
5935 // PC Relative calls no longer guarantee that the callee is a Global
5936 // Address Node. The callee could be an indirect tail call in which
5937 // case the SDValue for the callee could be a load (to load the address
5938 // of a function pointer) or it may be a register copy (to move the
5939 // address of the callee from a function parameter into a virtual
5940 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5941 assert((Subtarget.isUsingPCRelativeCalls() ||
5942 isa<GlobalAddressSDNode>(Callee)) &&
5943 "Callee should be an llvm::Function object.");
5944
5945 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5946 << "\nTCO callee: ");
5947 LLVM_DEBUG(Callee.dump());
5948 }
5949 }
5950
5951 if (!isTailCall && CB && CB->isMustTailCall())
5952 report_fatal_error("failed to perform tail call elimination on a call "
5953 "site marked musttail");
5954
5955 // When long calls (i.e. indirect calls) are always used, calls are always
5956 // made via function pointer. If we have a function name, first translate it
5957 // into a pointer.
5958 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5959 !isTailCall)
5960 Callee = LowerGlobalAddress(Callee, DAG);
5961
5962 CallFlags CFlags(
5963 CallConv, isTailCall, isVarArg, isPatchPoint,
5964 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5965 // hasNest
5966 Subtarget.is64BitELFABI() &&
5967 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5968 CLI.NoMerge);
5969
5970 if (Subtarget.isAIXABI())
5971 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5972 InVals, CB);
5973
5974 assert(Subtarget.isSVR4ABI());
5975 if (Subtarget.isPPC64())
5976 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5977 InVals, CB);
5978 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5979 InVals, CB);
5980}
5981
5982SDValue PPCTargetLowering::LowerCall_32SVR4(
5983 SDValue Chain, SDValue Callee, CallFlags CFlags,
5985 const SmallVectorImpl<SDValue> &OutVals,
5986 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5988 const CallBase *CB) const {
5989 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5990 // of the 32-bit SVR4 ABI stack frame layout.
5991
5992 const CallingConv::ID CallConv = CFlags.CallConv;
5993 const bool IsVarArg = CFlags.IsVarArg;
5994 const bool IsTailCall = CFlags.IsTailCall;
5995
5996 assert((CallConv == CallingConv::C ||
5997 CallConv == CallingConv::Cold ||
5998 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5999
6000 const Align PtrAlign(4);
6001
6003
6004 // Mark this function as potentially containing a function that contains a
6005 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6006 // and restoring the callers stack pointer in this functions epilog. This is
6007 // done because by tail calling the called function might overwrite the value
6008 // in this function's (MF) stack pointer stack slot 0(SP).
6009 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6010 CallConv == CallingConv::Fast)
6011 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6012
6013 // Count how many bytes are to be pushed on the stack, including the linkage
6014 // area, parameter list area and the part of the local variable space which
6015 // contains copies of aggregates which are passed by value.
6016
6017 // Assign locations to all of the outgoing arguments.
6019 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6020
6021 // Reserve space for the linkage area on the stack.
6022 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6023 PtrAlign);
6024 if (useSoftFloat())
6025 CCInfo.PreAnalyzeCallOperands(Outs);
6026
6027 if (IsVarArg) {
6028 // Handle fixed and variable vector arguments differently.
6029 // Fixed vector arguments go into registers as long as registers are
6030 // available. Variable vector arguments always go into memory.
6031 unsigned NumArgs = Outs.size();
6032
6033 for (unsigned i = 0; i != NumArgs; ++i) {
6034 MVT ArgVT = Outs[i].VT;
6035 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6036 bool Result;
6037
6038 if (Outs[i].IsFixed) {
6039 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6040 CCInfo);
6041 } else {
6043 ArgFlags, CCInfo);
6044 }
6045
6046 if (Result) {
6047#ifndef NDEBUG
6048 errs() << "Call operand #" << i << " has unhandled type "
6049 << ArgVT << "\n";
6050#endif
6051 llvm_unreachable(nullptr);
6052 }
6053 }
6054 } else {
6055 // All arguments are treated the same.
6056 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6057 }
6058 CCInfo.clearWasPPCF128();
6059
6060 // Assign locations to all of the outgoing aggregate by value arguments.
6061 SmallVector<CCValAssign, 16> ByValArgLocs;
6062 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6063
6064 // Reserve stack space for the allocations in CCInfo.
6065 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6066
6067 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6068
6069 // Size of the linkage area, parameter list area and the part of the local
6070 // space variable where copies of aggregates which are passed by value are
6071 // stored.
6072 unsigned NumBytes = CCByValInfo.getStackSize();
6073
6074 // Calculate by how many bytes the stack has to be adjusted in case of tail
6075 // call optimization.
6076 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6077
6078 // Adjust the stack pointer for the new arguments...
6079 // These operations are automatically eliminated by the prolog/epilog pass
6080 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6081 SDValue CallSeqStart = Chain;
6082
6083 // Load the return address and frame pointer so it can be moved somewhere else
6084 // later.
6085 SDValue LROp, FPOp;
6086 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6087
6088 // Set up a copy of the stack pointer for use loading and storing any
6089 // arguments that may not fit in the registers available for argument
6090 // passing.
6091 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6092
6094 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6095 SmallVector<SDValue, 8> MemOpChains;
6096
6097 bool seenFloatArg = false;
6098 // Walk the register/memloc assignments, inserting copies/loads.
6099 // i - Tracks the index into the list of registers allocated for the call
6100 // RealArgIdx - Tracks the index into the list of actual function arguments
6101 // j - Tracks the index into the list of byval arguments
6102 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6103 i != e;
6104 ++i, ++RealArgIdx) {
6105 CCValAssign &VA = ArgLocs[i];
6106 SDValue Arg = OutVals[RealArgIdx];
6107 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6108
6109 if (Flags.isByVal()) {
6110 // Argument is an aggregate which is passed by value, thus we need to
6111 // create a copy of it in the local variable space of the current stack
6112 // frame (which is the stack frame of the caller) and pass the address of
6113 // this copy to the callee.
6114 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6115 CCValAssign &ByValVA = ByValArgLocs[j++];
6116 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6117
6118 // Memory reserved in the local variable space of the callers stack frame.
6119 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6120
6121 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6122 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6123 StackPtr, PtrOff);
6124
6125 // Create a copy of the argument in the local area of the current
6126 // stack frame.
6127 SDValue MemcpyCall =
6128 CreateCopyOfByValArgument(Arg, PtrOff,
6129 CallSeqStart.getNode()->getOperand(0),
6130 Flags, DAG, dl);
6131
6132 // This must go outside the CALLSEQ_START..END.
6133 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6134 SDLoc(MemcpyCall));
6135 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6136 NewCallSeqStart.getNode());
6137 Chain = CallSeqStart = NewCallSeqStart;
6138
6139 // Pass the address of the aggregate copy on the stack either in a
6140 // physical register or in the parameter list area of the current stack
6141 // frame to the callee.
6142 Arg = PtrOff;
6143 }
6144
6145 // When useCRBits() is true, there can be i1 arguments.
6146 // It is because getRegisterType(MVT::i1) => MVT::i1,
6147 // and for other integer types getRegisterType() => MVT::i32.
6148 // Extend i1 and ensure callee will get i32.
6149 if (Arg.getValueType() == MVT::i1)
6150 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6151 dl, MVT::i32, Arg);
6152
6153 if (VA.isRegLoc()) {
6154 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6155 // Put argument in a physical register.
6156 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6157 bool IsLE = Subtarget.isLittleEndian();
6158 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6159 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6160 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6161 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6162 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6163 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6164 SVal.getValue(0)));
6165 } else
6166 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6167 } else {
6168 // Put argument in the parameter list area of the current stack frame.
6169 assert(VA.isMemLoc());
6170 unsigned LocMemOffset = VA.getLocMemOffset();
6171
6172 if (!IsTailCall) {
6173 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6174 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6175 StackPtr, PtrOff);
6176
6177 MemOpChains.push_back(
6178 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6179 } else {
6180 // Calculate and remember argument location.
6181 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6182 TailCallArguments);
6183 }
6184 }
6185 }
6186
6187 if (!MemOpChains.empty())
6188 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6189
6190 // Build a sequence of copy-to-reg nodes chained together with token chain
6191 // and flag operands which copy the outgoing args into the appropriate regs.
6192 SDValue InGlue;
6193 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6194 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6195 RegsToPass[i].second, InGlue);
6196 InGlue = Chain.getValue(1);
6197 }
6198
6199 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6200 // registers.
6201 if (IsVarArg) {
6202 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6203 SDValue Ops[] = { Chain, InGlue };
6204
6205 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6206 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6207
6208 InGlue = Chain.getValue(1);
6209 }
6210
6211 if (IsTailCall)
6212 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6213 TailCallArguments);
6214
6215 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6216 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6217}
6218
6219// Copy an argument into memory, being careful to do this outside the
6220// call sequence for the call to which the argument belongs.
6221SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6222 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6223 SelectionDAG &DAG, const SDLoc &dl) const {
6224 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6225 CallSeqStart.getNode()->getOperand(0),
6226 Flags, DAG, dl);
6227 // The MEMCPY must go outside the CALLSEQ_START..END.
6228 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6229 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6230 SDLoc(MemcpyCall));
6231 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6232 NewCallSeqStart.getNode());
6233 return NewCallSeqStart;
6234}
6235
6236SDValue PPCTargetLowering::LowerCall_64SVR4(
6237 SDValue Chain, SDValue Callee, CallFlags CFlags,
6239 const SmallVectorImpl<SDValue> &OutVals,
6240 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6242 const CallBase *CB) const {
6243 bool isELFv2ABI = Subtarget.isELFv2ABI();
6244 bool isLittleEndian = Subtarget.isLittleEndian();
6245 unsigned NumOps = Outs.size();
6246 bool IsSibCall = false;
6247 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6248
6249 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6250 unsigned PtrByteSize = 8;
6251
6253
6254 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6255 IsSibCall = true;
6256
6257 // Mark this function as potentially containing a function that contains a
6258 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6259 // and restoring the callers stack pointer in this functions epilog. This is
6260 // done because by tail calling the called function might overwrite the value
6261 // in this function's (MF) stack pointer stack slot 0(SP).
6262 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6263 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6264
6265 assert(!(IsFastCall && CFlags.IsVarArg) &&
6266 "fastcc not supported on varargs functions");
6267
6268 // Count how many bytes are to be pushed on the stack, including the linkage
6269 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6270 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6271 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6272 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6273 unsigned NumBytes = LinkageSize;
6274 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6275
6276 static const MCPhysReg GPR[] = {
6277 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6278 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6279 };
6280 static const MCPhysReg VR[] = {
6281 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6282 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6283 };
6284
6285 const unsigned NumGPRs = std::size(GPR);
6286 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6287 const unsigned NumVRs = std::size(VR);
6288
6289 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6290 // can be passed to the callee in registers.
6291 // For the fast calling convention, there is another check below.
6292 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6293 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6294 if (!HasParameterArea) {
6295 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6296 unsigned AvailableFPRs = NumFPRs;
6297 unsigned AvailableVRs = NumVRs;
6298 unsigned NumBytesTmp = NumBytes;
6299 for (unsigned i = 0; i != NumOps; ++i) {
6300 if (Outs[i].Flags.isNest()) continue;
6301 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6302 PtrByteSize, LinkageSize, ParamAreaSize,
6303 NumBytesTmp, AvailableFPRs, AvailableVRs))
6304 HasParameterArea = true;
6305 }
6306 }
6307
6308 // When using the fast calling convention, we don't provide backing for
6309 // arguments that will be in registers.
6310 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6311
6312 // Avoid allocating parameter area for fastcc functions if all the arguments
6313 // can be passed in the registers.
6314 if (IsFastCall)
6315 HasParameterArea = false;
6316
6317 // Add up all the space actually used.
6318 for (unsigned i = 0; i != NumOps; ++i) {
6319 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6320 EVT ArgVT = Outs[i].VT;
6321 EVT OrigVT = Outs[i].ArgVT;
6322
6323 if (Flags.isNest())
6324 continue;
6325
6326 if (IsFastCall) {
6327 if (Flags.isByVal()) {
6328 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6329 if (NumGPRsUsed > NumGPRs)
6330 HasParameterArea = true;
6331 } else {
6332 switch (ArgVT.getSimpleVT().SimpleTy) {
6333 default: llvm_unreachable("Unexpected ValueType for argument!");
6334 case MVT::i1:
6335 case MVT::i32:
6336 case MVT::i64:
6337 if (++NumGPRsUsed <= NumGPRs)
6338 continue;
6339 break;
6340 case MVT::v4i32:
6341 case MVT::v8i16:
6342 case MVT::v16i8:
6343 case MVT::v2f64:
6344 case MVT::v2i64:
6345 case MVT::v1i128:
6346 case MVT::f128:
6347 if (++NumVRsUsed <= NumVRs)
6348 continue;
6349 break;
6350 case MVT::v4f32:
6351 if (++NumVRsUsed <= NumVRs)
6352 continue;
6353 break;
6354 case MVT::f32:
6355 case MVT::f64:
6356 if (++NumFPRsUsed <= NumFPRs)
6357 continue;
6358 break;
6359 }
6360 HasParameterArea = true;
6361 }
6362 }
6363
6364 /* Respect alignment of argument on the stack. */
6365 auto Alignement =
6366 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6367 NumBytes = alignTo(NumBytes, Alignement);
6368
6369 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6370 if (Flags.isInConsecutiveRegsLast())
6371 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6372 }
6373
6374 unsigned NumBytesActuallyUsed = NumBytes;
6375
6376 // In the old ELFv1 ABI,
6377 // the prolog code of the callee may store up to 8 GPR argument registers to
6378 // the stack, allowing va_start to index over them in memory if its varargs.
6379 // Because we cannot tell if this is needed on the caller side, we have to
6380 // conservatively assume that it is needed. As such, make sure we have at
6381 // least enough stack space for the caller to store the 8 GPRs.
6382 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6383 // really requires memory operands, e.g. a vararg function.
6384 if (HasParameterArea)
6385 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6386 else
6387 NumBytes = LinkageSize;
6388
6389 // Tail call needs the stack to be aligned.
6390 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6391 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6392
6393 int SPDiff = 0;
6394
6395 // Calculate by how many bytes the stack has to be adjusted in case of tail
6396 // call optimization.
6397 if (!IsSibCall)
6398 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6399
6400 // To protect arguments on the stack from being clobbered in a tail call,
6401 // force all the loads to happen before doing any other lowering.
6402 if (CFlags.IsTailCall)
6403 Chain = DAG.getStackArgumentTokenFactor(Chain);
6404
6405 // Adjust the stack pointer for the new arguments...
6406 // These operations are automatically eliminated by the prolog/epilog pass
6407 if (!IsSibCall)
6408 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6409 SDValue CallSeqStart = Chain;
6410
6411 // Load the return address and frame pointer so it can be move somewhere else
6412 // later.
6413 SDValue LROp, FPOp;
6414 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6415
6416 // Set up a copy of the stack pointer for use loading and storing any
6417 // arguments that may not fit in the registers available for argument
6418 // passing.
6419 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6420
6421 // Figure out which arguments are going to go in registers, and which in
6422 // memory. Also, if this is a vararg function, floating point operations
6423 // must be stored to our stack, and loaded into integer regs as well, if
6424 // any integer regs are available for argument passing.
6425 unsigned ArgOffset = LinkageSize;
6426
6428 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6429
6430 SmallVector<SDValue, 8> MemOpChains;
6431 for (unsigned i = 0; i != NumOps; ++i) {
6432 SDValue Arg = OutVals[i];
6433 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6434 EVT ArgVT = Outs[i].VT;
6435 EVT OrigVT = Outs[i].ArgVT;
6436
6437 // PtrOff will be used to store the current argument to the stack if a
6438 // register cannot be found for it.
6439 SDValue PtrOff;
6440
6441 // We re-align the argument offset for each argument, except when using the
6442 // fast calling convention, when we need to make sure we do that only when
6443 // we'll actually use a stack slot.
6444 auto ComputePtrOff = [&]() {
6445 /* Respect alignment of argument on the stack. */
6446 auto Alignment =
6447 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6448 ArgOffset = alignTo(ArgOffset, Alignment);
6449
6450 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6451
6452 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6453 };
6454
6455 if (!IsFastCall) {
6456 ComputePtrOff();
6457
6458 /* Compute GPR index associated with argument offset. */
6459 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6460 GPR_idx = std::min(GPR_idx, NumGPRs);
6461 }
6462
6463 // Promote integers to 64-bit values.
6464 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6465 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6466 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6467 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6468 }
6469
6470 // FIXME memcpy is used way more than necessary. Correctness first.
6471 // Note: "by value" is code for passing a structure by value, not
6472 // basic types.
6473 if (Flags.isByVal()) {
6474 // Note: Size includes alignment padding, so
6475 // struct x { short a; char b; }
6476 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6477 // These are the proper values we need for right-justifying the
6478 // aggregate in a parameter register.
6479 unsigned Size = Flags.getByValSize();
6480
6481 // An empty aggregate parameter takes up no storage and no
6482 // registers.
6483 if (Size == 0)
6484 continue;
6485
6486 if (IsFastCall)
6487 ComputePtrOff();
6488
6489 // All aggregates smaller than 8 bytes must be passed right-justified.
6490 if (Size==1 || Size==2 || Size==4) {
6491 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6492 if (GPR_idx != NumGPRs) {
6493 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6494 MachinePointerInfo(), VT);
6495 MemOpChains.push_back(Load.getValue(1));
6496 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6497
6498 ArgOffset += PtrByteSize;
6499 continue;
6500 }
6501 }
6502
6503 if (GPR_idx == NumGPRs && Size < 8) {
6504 SDValue AddPtr = PtrOff;
6505 if (!isLittleEndian) {
6506 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6507 PtrOff.getValueType());
6508 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6509 }
6510 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6511 CallSeqStart,
6512 Flags, DAG, dl);
6513 ArgOffset += PtrByteSize;
6514 continue;
6515 }
6516 // Copy the object to parameter save area if it can not be entirely passed
6517 // by registers.
6518 // FIXME: we only need to copy the parts which need to be passed in
6519 // parameter save area. For the parts passed by registers, we don't need
6520 // to copy them to the stack although we need to allocate space for them
6521 // in parameter save area.
6522 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6523 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6524 CallSeqStart,
6525 Flags, DAG, dl);
6526
6527 // When a register is available, pass a small aggregate right-justified.
6528 if (Size < 8 && GPR_idx != NumGPRs) {
6529 // The easiest way to get this right-justified in a register
6530 // is to copy the structure into the rightmost portion of a
6531 // local variable slot, then load the whole slot into the
6532 // register.
6533 // FIXME: The memcpy seems to produce pretty awful code for
6534 // small aggregates, particularly for packed ones.
6535 // FIXME: It would be preferable to use the slot in the
6536 // parameter save area instead of a new local variable.
6537 SDValue AddPtr = PtrOff;
6538 if (!isLittleEndian) {
6539 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6540 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6541 }
6542 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6543 CallSeqStart,
6544 Flags, DAG, dl);
6545
6546 // Load the slot into the register.
6547 SDValue Load =
6548 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6549 MemOpChains.push_back(Load.getValue(1));
6550 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6551
6552 // Done with this argument.
6553 ArgOffset += PtrByteSize;
6554 continue;
6555 }
6556
6557 // For aggregates larger than PtrByteSize, copy the pieces of the
6558 // object that fit into registers from the parameter save area.
6559 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6560 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6561 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6562 if (GPR_idx != NumGPRs) {
6563 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6564 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6565 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6566 MachinePointerInfo(), ObjType);
6567
6568 MemOpChains.push_back(Load.getValue(1));
6569 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6570 ArgOffset += PtrByteSize;
6571 } else {
6572 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6573 break;
6574 }
6575 }
6576 continue;
6577 }
6578
6579 switch (Arg.getSimpleValueType().SimpleTy) {
6580 default: llvm_unreachable("Unexpected ValueType for argument!");
6581 case MVT::i1:
6582 case MVT::i32:
6583 case MVT::i64:
6584 if (Flags.isNest()) {
6585 // The 'nest' parameter, if any, is passed in R11.
6586 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6587 break;
6588 }
6589
6590 // These can be scalar arguments or elements of an integer array type
6591 // passed directly. Clang may use those instead of "byval" aggregate
6592 // types to avoid forcing arguments to memory unnecessarily.
6593 if (GPR_idx != NumGPRs) {
6594 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6595 } else {
6596 if (IsFastCall)
6597 ComputePtrOff();
6598
6599 assert(HasParameterArea &&
6600 "Parameter area must exist to pass an argument in memory.");
6601 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6602 true, CFlags.IsTailCall, false, MemOpChains,
6603 TailCallArguments, dl);
6604 if (IsFastCall)
6605 ArgOffset += PtrByteSize;
6606 }
6607 if (!IsFastCall)
6608 ArgOffset += PtrByteSize;
6609 break;
6610 case MVT::f32:
6611 case MVT::f64: {
6612 // These can be scalar arguments or elements of a float array type
6613 // passed directly. The latter are used to implement ELFv2 homogenous
6614 // float aggregates.
6615
6616 // Named arguments go into FPRs first, and once they overflow, the
6617 // remaining arguments go into GPRs and then the parameter save area.
6618 // Unnamed arguments for vararg functions always go to GPRs and
6619 // then the parameter save area. For now, put all arguments to vararg
6620 // routines always in both locations (FPR *and* GPR or stack slot).
6621 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6622 bool NeededLoad = false;
6623
6624 // First load the argument into the next available FPR.
6625 if (FPR_idx != NumFPRs)
6626 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6627
6628 // Next, load the argument into GPR or stack slot if needed.
6629 if (!NeedGPROrStack)
6630 ;
6631 else if (GPR_idx != NumGPRs && !IsFastCall) {
6632 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6633 // once we support fp <-> gpr moves.
6634
6635 // In the non-vararg case, this can only ever happen in the
6636 // presence of f32 array types, since otherwise we never run
6637 // out of FPRs before running out of GPRs.
6638 SDValue ArgVal;
6639
6640 // Double values are always passed in a single GPR.
6641 if (Arg.getValueType() != MVT::f32) {
6642 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6643
6644 // Non-array float values are extended and passed in a GPR.
6645 } else if (!Flags.isInConsecutiveRegs()) {
6646 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6647 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6648
6649 // If we have an array of floats, we collect every odd element
6650 // together with its predecessor into one GPR.
6651 } else if (ArgOffset % PtrByteSize != 0) {
6652 SDValue Lo, Hi;
6653 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6654 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6655 if (!isLittleEndian)
6656 std::swap(Lo, Hi);
6657 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6658
6659 // The final element, if even, goes into the first half of a GPR.
6660 } else if (Flags.isInConsecutiveRegsLast()) {
6661 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6662 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6663 if (!isLittleEndian)
6664 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6665 DAG.getConstant(32, dl, MVT::i32));
6666
6667 // Non-final even elements are skipped; they will be handled
6668 // together the with subsequent argument on the next go-around.
6669 } else
6670 ArgVal = SDValue();
6671
6672 if (ArgVal.getNode())
6673 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6674 } else {
6675 if (IsFastCall)
6676 ComputePtrOff();
6677
6678 // Single-precision floating-point values are mapped to the
6679 // second (rightmost) word of the stack doubleword.
6680 if (Arg.getValueType() == MVT::f32 &&
6681 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6682 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6683 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6684 }
6685
6686 assert(HasParameterArea &&
6687 "Parameter area must exist to pass an argument in memory.");
6688 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6689 true, CFlags.IsTailCall, false, MemOpChains,
6690 TailCallArguments, dl);
6691
6692 NeededLoad = true;
6693 }
6694 // When passing an array of floats, the array occupies consecutive
6695 // space in the argument area; only round up to the next doubleword
6696 // at the end of the array. Otherwise, each float takes 8 bytes.
6697 if (!IsFastCall || NeededLoad) {
6698 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6699 Flags.isInConsecutiveRegs()) ? 4 : 8;
6700 if (Flags.isInConsecutiveRegsLast())
6701 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6702 }
6703 break;
6704 }
6705 case MVT::v4f32:
6706 case MVT::v4i32:
6707 case MVT::v8i16:
6708 case MVT::v16i8:
6709 case MVT::v2f64:
6710 case MVT::v2i64:
6711 case MVT::v1i128:
6712 case MVT::f128:
6713 // These can be scalar arguments or elements of a vector array type
6714 // passed directly. The latter are used to implement ELFv2 homogenous
6715 // vector aggregates.
6716
6717 // For a varargs call, named arguments go into VRs or on the stack as
6718 // usual; unnamed arguments always go to the stack or the corresponding
6719 // GPRs when within range. For now, we always put the value in both
6720 // locations (or even all three).
6721 if (CFlags.IsVarArg) {
6722 assert(HasParameterArea &&
6723 "Parameter area must exist if we have a varargs call.");
6724 // We could elide this store in the case where the object fits
6725 // entirely in R registers. Maybe later.
6726 SDValue Store =
6727 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6728 MemOpChains.push_back(Store);
6729 if (VR_idx != NumVRs) {
6730 SDValue Load =
6731 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6732 MemOpChains.push_back(Load.getValue(1));
6733 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6734 }
6735 ArgOffset += 16;
6736 for (unsigned i=0; i<16; i+=PtrByteSize) {
6737 if (GPR_idx == NumGPRs)
6738 break;
6739 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6740 DAG.getConstant(i, dl, PtrVT));
6741 SDValue Load =
6742 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6743 MemOpChains.push_back(Load.getValue(1));
6744 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6745 }
6746 break;
6747 }
6748
6749 // Non-varargs Altivec params go into VRs or on the stack.
6750 if (VR_idx != NumVRs) {
6751 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6752 } else {
6753 if (IsFastCall)
6754 ComputePtrOff();
6755
6756 assert(HasParameterArea &&
6757 "Parameter area must exist to pass an argument in memory.");
6758 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6759 true, CFlags.IsTailCall, true, MemOpChains,
6760 TailCallArguments, dl);
6761 if (IsFastCall)
6762 ArgOffset += 16;
6763 }
6764
6765 if (!IsFastCall)
6766 ArgOffset += 16;
6767 break;
6768 }
6769 }
6770
6771 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6772 "mismatch in size of parameter area");
6773 (void)NumBytesActuallyUsed;
6774
6775 if (!MemOpChains.empty())
6776 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6777
6778 // Check if this is an indirect call (MTCTR/BCTRL).
6779 // See prepareDescriptorIndirectCall and buildCallOperands for more
6780 // information about calls through function pointers in the 64-bit SVR4 ABI.
6781 if (CFlags.IsIndirect) {
6782 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6783 // caller in the TOC save area.
6784 if (isTOCSaveRestoreRequired(Subtarget)) {
6785 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6786 // Load r2 into a virtual register and store it to the TOC save area.
6787 setUsesTOCBasePtr(DAG);
6788 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6789 // TOC save area offset.
6790 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6791 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6792 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6793 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6795 DAG.getMachineFunction(), TOCSaveOffset));
6796 }
6797 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6798 // This does not mean the MTCTR instruction must use R12; it's easier
6799 // to model this as an extra parameter, so do that.
6800 if (isELFv2ABI && !CFlags.IsPatchPoint)
6801 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6802 }
6803
6804 // Build a sequence of copy-to-reg nodes chained together with token chain
6805 // and flag operands which copy the outgoing args into the appropriate regs.
6806 SDValue InGlue;
6807 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6808 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6809 RegsToPass[i].second, InGlue);
6810 InGlue = Chain.getValue(1);
6811 }
6812
6813 if (CFlags.IsTailCall && !IsSibCall)
6814 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6815 TailCallArguments);
6816
6817 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6818 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6819}
6820
6821// Returns true when the shadow of a general purpose argument register
6822// in the parameter save area is aligned to at least 'RequiredAlign'.
6823static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6824 assert(RequiredAlign.value() <= 16 &&
6825 "Required alignment greater than stack alignment.");
6826 switch (Reg) {
6827 default:
6828 report_fatal_error("called on invalid register.");
6829 case PPC::R5:
6830 case PPC::R9:
6831 case PPC::X3:
6832 case PPC::X5:
6833 case PPC::X7:
6834 case PPC::X9:
6835 // These registers are 16 byte aligned which is the most strict aligment
6836 // we can support.
6837 return true;
6838 case PPC::R3:
6839 case PPC::R7:
6840 case PPC::X4:
6841 case PPC::X6:
6842 case PPC::X8:
6843 case PPC::X10:
6844 // The shadow of these registers in the PSA is 8 byte aligned.
6845 return RequiredAlign <= 8;
6846 case PPC::R4:
6847 case PPC::R6:
6848 case PPC::R8:
6849 case PPC::R10:
6850 return RequiredAlign <= 4;
6851 }
6852}
6853
6854static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6855 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6856 CCState &S) {
6857 AIXCCState &State = static_cast<AIXCCState &>(S);
6858 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6859 State.getMachineFunction().getSubtarget());
6860 const bool IsPPC64 = Subtarget.isPPC64();
6861 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6862 const Align PtrAlign(PtrSize);
6863 const Align StackAlign(16);
6864 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6865
6866 if (ValVT == MVT::f128)
6867 report_fatal_error("f128 is unimplemented on AIX.");
6868
6869 if (ArgFlags.isNest())
6870 report_fatal_error("Nest arguments are unimplemented.");
6871
6872 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6873 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6874 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6875 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6876 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6877 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6878
6879 static const MCPhysReg VR[] = {// Vector registers.
6880 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6881 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6882 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6883
6884 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6885
6886 if (ArgFlags.isByVal()) {
6887 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6888 if (ByValAlign > StackAlign)
6889 report_fatal_error("Pass-by-value arguments with alignment greater than "
6890 "16 are not supported.");
6891
6892 const unsigned ByValSize = ArgFlags.getByValSize();
6893 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6894
6895 // An empty aggregate parameter takes up no storage and no registers,
6896 // but needs a MemLoc for a stack slot for the formal arguments side.
6897 if (ByValSize == 0) {
6899 State.getStackSize(), RegVT, LocInfo));
6900 return false;
6901 }
6902
6903 // Shadow allocate any registers that are not properly aligned.
6904 unsigned NextReg = State.getFirstUnallocated(GPRs);
6905 while (NextReg != GPRs.size() &&
6906 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6907 // Shadow allocate next registers since its aligment is not strict enough.
6908 MCRegister Reg = State.AllocateReg(GPRs);
6909 // Allocate the stack space shadowed by said register.
6910 State.AllocateStack(PtrSize, PtrAlign);
6911 assert(Reg && "Alocating register unexpectedly failed.");
6912 (void)Reg;
6913 NextReg = State.getFirstUnallocated(GPRs);
6914 }
6915
6916 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6917 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6918 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6919 if (MCRegister Reg = State.AllocateReg(GPRs))
6920 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6921 else {
6924 LocInfo));
6925 break;
6926 }
6927 }
6928 return false;
6929 }
6930
6931 // Arguments always reserve parameter save area.
6932 switch (ValVT.SimpleTy) {
6933 default:
6934 report_fatal_error("Unhandled value type for argument.");
6935 case MVT::i64:
6936 // i64 arguments should have been split to i32 for PPC32.
6937 assert(IsPPC64 && "PPC32 should have split i64 values.");
6938 [[fallthrough]];
6939 case MVT::i1:
6940 case MVT::i32: {
6941 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6942 // AIX integer arguments are always passed in register width.
6943 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6944 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6946 if (MCRegister Reg = State.AllocateReg(GPRs))
6947 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6948 else
6949 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6950
6951 return false;
6952 }
6953 case MVT::f32:
6954 case MVT::f64: {
6955 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6956 const unsigned StoreSize = LocVT.getStoreSize();
6957 // Floats are always 4-byte aligned in the PSA on AIX.
6958 // This includes f64 in 64-bit mode for ABI compatibility.
6959 const unsigned Offset =
6960 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6961 MCRegister FReg = State.AllocateReg(FPR);
6962 if (FReg)
6963 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6964
6965 // Reserve and initialize GPRs or initialize the PSA as required.
6966 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6967 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6968 assert(FReg && "An FPR should be available when a GPR is reserved.");
6969 if (State.isVarArg()) {
6970 // Successfully reserved GPRs are only initialized for vararg calls.
6971 // Custom handling is required for:
6972 // f64 in PPC32 needs to be split into 2 GPRs.
6973 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6974 State.addLoc(
6975 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6976 }
6977 } else {
6978 // If there are insufficient GPRs, the PSA needs to be initialized.
6979 // Initialization occurs even if an FPR was initialized for
6980 // compatibility with the AIX XL compiler. The full memory for the
6981 // argument will be initialized even if a prior word is saved in GPR.
6982 // A custom memLoc is used when the argument also passes in FPR so
6983 // that the callee handling can skip over it easily.
6984 State.addLoc(
6985 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6986 LocInfo)
6987 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6988 break;
6989 }
6990 }
6991
6992 return false;
6993 }
6994 case MVT::v4f32:
6995 case MVT::v4i32:
6996 case MVT::v8i16:
6997 case MVT::v16i8:
6998 case MVT::v2i64:
6999 case MVT::v2f64:
7000 case MVT::v1i128: {
7001 const unsigned VecSize = 16;
7002 const Align VecAlign(VecSize);
7003
7004 if (!State.isVarArg()) {
7005 // If there are vector registers remaining we don't consume any stack
7006 // space.
7007 if (MCRegister VReg = State.AllocateReg(VR)) {
7008 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7009 return false;
7010 }
7011 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7012 // might be allocated in the portion of the PSA that is shadowed by the
7013 // GPRs.
7014 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7015 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7016 return false;
7017 }
7018
7019 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7020 // Burn any underaligned registers and their shadowed stack space until
7021 // we reach the required alignment.
7022 while (NextRegIndex != GPRs.size() &&
7023 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7024 // Shadow allocate register and its stack shadow.
7025 MCRegister Reg = State.AllocateReg(GPRs);
7026 State.AllocateStack(PtrSize, PtrAlign);
7027 assert(Reg && "Allocating register unexpectedly failed.");
7028 (void)Reg;
7029 NextRegIndex = State.getFirstUnallocated(GPRs);
7030 }
7031
7032 // Vectors that are passed as fixed arguments are handled differently.
7033 // They are passed in VRs if any are available (unlike arguments passed
7034 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7035 // functions)
7036 if (State.isFixed(ValNo)) {
7037 if (MCRegister VReg = State.AllocateReg(VR)) {
7038 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7039 // Shadow allocate GPRs and stack space even though we pass in a VR.
7040 for (unsigned I = 0; I != VecSize; I += PtrSize)
7041 State.AllocateReg(GPRs);
7042 State.AllocateStack(VecSize, VecAlign);
7043 return false;
7044 }
7045 // No vector registers remain so pass on the stack.
7046 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7047 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7048 return false;
7049 }
7050
7051 // If all GPRS are consumed then we pass the argument fully on the stack.
7052 if (NextRegIndex == GPRs.size()) {
7053 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7054 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7055 return false;
7056 }
7057
7058 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7059 // half of the argument, and then need to pass the remaining half on the
7060 // stack.
7061 if (GPRs[NextRegIndex] == PPC::R9) {
7062 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7063 State.addLoc(
7064 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7065
7066 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
7067 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
7068 assert(FirstReg && SecondReg &&
7069 "Allocating R9 or R10 unexpectedly failed.");
7070 State.addLoc(
7071 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7072 State.addLoc(
7073 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7074 return false;
7075 }
7076
7077 // We have enough GPRs to fully pass the vector argument, and we have
7078 // already consumed any underaligned registers. Start with the custom
7079 // MemLoc and then the custom RegLocs.
7080 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7081 State.addLoc(
7082 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7083 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7084 const MCRegister Reg = State.AllocateReg(GPRs);
7085 assert(Reg && "Failed to allocated register for vararg vector argument");
7086 State.addLoc(
7087 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7088 }
7089 return false;
7090 }
7091 }
7092 return true;
7093}
7094
7095// So far, this function is only used by LowerFormalArguments_AIX()
7097 bool IsPPC64,
7098 bool HasP8Vector,
7099 bool HasVSX) {
7100 assert((IsPPC64 || SVT != MVT::i64) &&
7101 "i64 should have been split for 32-bit codegen.");
7102
7103 switch (SVT) {
7104 default:
7105 report_fatal_error("Unexpected value type for formal argument");
7106 case MVT::i1:
7107 case MVT::i32:
7108 case MVT::i64:
7109 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7110 case MVT::f32:
7111 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7112 case MVT::f64:
7113 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7114 case MVT::v4f32:
7115 case MVT::v4i32:
7116 case MVT::v8i16:
7117 case MVT::v16i8:
7118 case MVT::v2i64:
7119 case MVT::v2f64:
7120 case MVT::v1i128:
7121 return &PPC::VRRCRegClass;
7122 }
7123}
7124
7126 SelectionDAG &DAG, SDValue ArgValue,
7127 MVT LocVT, const SDLoc &dl) {
7128 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7129 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7130
7131 if (Flags.isSExt())
7132 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7133 DAG.getValueType(ValVT));
7134 else if (Flags.isZExt())
7135 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7136 DAG.getValueType(ValVT));
7137
7138 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7139}
7140
7141static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7142 const unsigned LASize = FL->getLinkageSize();
7143
7144 if (PPC::GPRCRegClass.contains(Reg)) {
7145 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7146 "Reg must be a valid argument register!");
7147 return LASize + 4 * (Reg - PPC::R3);
7148 }
7149
7150 if (PPC::G8RCRegClass.contains(Reg)) {
7151 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7152 "Reg must be a valid argument register!");
7153 return LASize + 8 * (Reg - PPC::X3);
7154 }
7155
7156 llvm_unreachable("Only general purpose registers expected.");
7157}
7158
7159// AIX ABI Stack Frame Layout:
7160//
7161// Low Memory +--------------------------------------------+
7162// SP +---> | Back chain | ---+
7163// | +--------------------------------------------+ |
7164// | | Saved Condition Register | |
7165// | +--------------------------------------------+ |
7166// | | Saved Linkage Register | |
7167// | +--------------------------------------------+ | Linkage Area
7168// | | Reserved for compilers | |
7169// | +--------------------------------------------+ |
7170// | | Reserved for binders | |
7171// | +--------------------------------------------+ |
7172// | | Saved TOC pointer | ---+
7173// | +--------------------------------------------+
7174// | | Parameter save area |
7175// | +--------------------------------------------+
7176// | | Alloca space |
7177// | +--------------------------------------------+
7178// | | Local variable space |
7179// | +--------------------------------------------+
7180// | | Float/int conversion temporary |
7181// | +--------------------------------------------+
7182// | | Save area for AltiVec registers |
7183// | +--------------------------------------------+
7184// | | AltiVec alignment padding |
7185// | +--------------------------------------------+
7186// | | Save area for VRSAVE register |
7187// | +--------------------------------------------+
7188// | | Save area for General Purpose registers |
7189// | +--------------------------------------------+
7190// | | Save area for Floating Point registers |
7191// | +--------------------------------------------+
7192// +---- | Back chain |
7193// High Memory +--------------------------------------------+
7194//
7195// Specifications:
7196// AIX 7.2 Assembler Language Reference
7197// Subroutine linkage convention
7198
7199SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7200 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7201 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7202 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7203
7204 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7205 CallConv == CallingConv::Fast) &&
7206 "Unexpected calling convention!");
7207
7208 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7209 report_fatal_error("Tail call support is unimplemented on AIX.");
7210
7211 if (useSoftFloat())
7212 report_fatal_error("Soft float support is unimplemented on AIX.");
7213
7214 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7215
7216 const bool IsPPC64 = Subtarget.isPPC64();
7217 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7218
7219 // Assign locations to all of the incoming arguments.
7222 MachineFrameInfo &MFI = MF.getFrameInfo();
7223 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7224 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7225
7226 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7227 // Reserve space for the linkage area on the stack.
7228 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7229 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7230 uint64_t SaveStackPos = CCInfo.getStackSize();
7231 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7232 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7233
7235
7236 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7237 CCValAssign &VA = ArgLocs[I++];
7238 MVT LocVT = VA.getLocVT();
7239 MVT ValVT = VA.getValVT();
7240 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7241 // For compatibility with the AIX XL compiler, the float args in the
7242 // parameter save area are initialized even if the argument is available
7243 // in register. The caller is required to initialize both the register
7244 // and memory, however, the callee can choose to expect it in either.
7245 // The memloc is dismissed here because the argument is retrieved from
7246 // the register.
7247 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7248 continue;
7249
7250 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7251 const TargetRegisterClass *RegClass = getRegClassForSVT(
7252 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7253 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7254 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7255 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7256 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7257 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7258 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7259 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7260 MachinePointerInfo(), Align(PtrByteSize));
7261 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7262 MemOps.push_back(StoreReg);
7263 }
7264
7265 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7266 unsigned StoreSize =
7267 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7268 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7269 }
7270
7271 auto HandleMemLoc = [&]() {
7272 const unsigned LocSize = LocVT.getStoreSize();
7273 const unsigned ValSize = ValVT.getStoreSize();
7274 assert((ValSize <= LocSize) &&
7275 "Object size is larger than size of MemLoc");
7276 int CurArgOffset = VA.getLocMemOffset();
7277 // Objects are right-justified because AIX is big-endian.
7278 if (LocSize > ValSize)
7279 CurArgOffset += LocSize - ValSize;
7280 // Potential tail calls could cause overwriting of argument stack slots.
7281 const bool IsImmutable =
7283 (CallConv == CallingConv::Fast));
7284 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7285 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7286 SDValue ArgValue =
7287 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7288 InVals.push_back(ArgValue);
7289 };
7290
7291 // Vector arguments to VaArg functions are passed both on the stack, and
7292 // in any available GPRs. Load the value from the stack and add the GPRs
7293 // as live ins.
7294 if (VA.isMemLoc() && VA.needsCustom()) {
7295 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7296 assert(isVarArg && "Only use custom memloc for vararg.");
7297 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7298 // matching custom RegLocs.
7299 const unsigned OriginalValNo = VA.getValNo();
7300 (void)OriginalValNo;
7301
7302 auto HandleCustomVecRegLoc = [&]() {
7303 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7304 "Missing custom RegLoc.");
7305 VA = ArgLocs[I++];
7306 assert(VA.getValVT().isVector() &&
7307 "Unexpected Val type for custom RegLoc.");
7308 assert(VA.getValNo() == OriginalValNo &&
7309 "ValNo mismatch between custom MemLoc and RegLoc.");
7311 MF.addLiveIn(VA.getLocReg(),
7312 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7313 Subtarget.hasVSX()));
7314 };
7315
7316 HandleMemLoc();
7317 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7318 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7319 // R10.
7320 HandleCustomVecRegLoc();
7321 HandleCustomVecRegLoc();
7322
7323 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7324 // we passed the vector in R5, R6, R7 and R8.
7325 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7326 assert(!IsPPC64 &&
7327 "Only 2 custom RegLocs expected for 64-bit codegen.");
7328 HandleCustomVecRegLoc();
7329 HandleCustomVecRegLoc();
7330 }
7331
7332 continue;
7333 }
7334
7335 if (VA.isRegLoc()) {
7336 if (VA.getValVT().isScalarInteger())
7338 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7339 switch (VA.getValVT().SimpleTy) {
7340 default:
7341 report_fatal_error("Unhandled value type for argument.");
7342 case MVT::f32:
7344 break;
7345 case MVT::f64:
7347 break;
7348 }
7349 } else if (VA.getValVT().isVector()) {
7350 switch (VA.getValVT().SimpleTy) {
7351 default:
7352 report_fatal_error("Unhandled value type for argument.");
7353 case MVT::v16i8:
7355 break;
7356 case MVT::v8i16:
7358 break;
7359 case MVT::v4i32:
7360 case MVT::v2i64:
7361 case MVT::v1i128:
7363 break;
7364 case MVT::v4f32:
7365 case MVT::v2f64:
7367 break;
7368 }
7369 }
7370 }
7371
7372 if (Flags.isByVal() && VA.isMemLoc()) {
7373 const unsigned Size =
7374 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7375 PtrByteSize);
7376 const int FI = MF.getFrameInfo().CreateFixedObject(
7377 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7378 /* IsAliased */ true);
7379 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7380 InVals.push_back(FIN);
7381
7382 continue;
7383 }
7384
7385 if (Flags.isByVal()) {
7386 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7387
7388 const MCPhysReg ArgReg = VA.getLocReg();
7389 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7390
7391 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7392 const int FI = MF.getFrameInfo().CreateFixedObject(
7393 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7394 /* IsAliased */ true);
7395 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7396 InVals.push_back(FIN);
7397
7398 // Add live ins for all the RegLocs for the same ByVal.
7399 const TargetRegisterClass *RegClass =
7400 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7401
7402 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7403 unsigned Offset) {
7404 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7405 // Since the callers side has left justified the aggregate in the
7406 // register, we can simply store the entire register into the stack
7407 // slot.
7408 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7409 // The store to the fixedstack object is needed becuase accessing a
7410 // field of the ByVal will use a gep and load. Ideally we will optimize
7411 // to extracting the value from the register directly, and elide the
7412 // stores when the arguments address is not taken, but that will need to
7413 // be future work.
7414 SDValue Store = DAG.getStore(
7415 CopyFrom.getValue(1), dl, CopyFrom,
7418
7419 MemOps.push_back(Store);
7420 };
7421
7422 unsigned Offset = 0;
7423 HandleRegLoc(VA.getLocReg(), Offset);
7424 Offset += PtrByteSize;
7425 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7426 Offset += PtrByteSize) {
7427 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7428 "RegLocs should be for ByVal argument.");
7429
7430 const CCValAssign RL = ArgLocs[I++];
7431 HandleRegLoc(RL.getLocReg(), Offset);
7433 }
7434
7435 if (Offset != StackSize) {
7436 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7437 "Expected MemLoc for remaining bytes.");
7438 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7439 // Consume the MemLoc.The InVal has already been emitted, so nothing
7440 // more needs to be done.
7441 ++I;
7442 }
7443
7444 continue;
7445 }
7446
7447 if (VA.isRegLoc() && !VA.needsCustom()) {
7448 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7449 Register VReg =
7450 MF.addLiveIn(VA.getLocReg(),
7451 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7452 Subtarget.hasVSX()));
7453 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7454 if (ValVT.isScalarInteger() &&
7455 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7456 ArgValue =
7457 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7458 }
7459 InVals.push_back(ArgValue);
7460 continue;
7461 }
7462 if (VA.isMemLoc()) {
7463 HandleMemLoc();
7464 continue;
7465 }
7466 }
7467
7468 // On AIX a minimum of 8 words is saved to the parameter save area.
7469 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7470 // Area that is at least reserved in the caller of this function.
7471 unsigned CallerReservedArea = std::max<unsigned>(
7472 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7473
7474 // Set the size that is at least reserved in caller of this function. Tail
7475 // call optimized function's reserved stack space needs to be aligned so
7476 // that taking the difference between two stack areas will result in an
7477 // aligned stack.
7478 CallerReservedArea =
7479 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7480 FuncInfo->setMinReservedArea(CallerReservedArea);
7481
7482 if (isVarArg) {
7483 FuncInfo->setVarArgsFrameIndex(
7484 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7485 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7486
7487 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7488 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7489
7490 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7491 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7492 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7493
7494 // The fixed integer arguments of a variadic function are stored to the
7495 // VarArgsFrameIndex on the stack so that they may be loaded by
7496 // dereferencing the result of va_next.
7497 for (unsigned GPRIndex =
7498 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7499 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7500
7501 const Register VReg =
7502 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7503 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7504
7505 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7506 SDValue Store =
7507 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7508 MemOps.push_back(Store);
7509 // Increment the address for the next argument to store.
7510 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7511 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7512 }
7513 }
7514
7515 if (!MemOps.empty())
7516 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7517
7518 return Chain;
7519}
7520
7521SDValue PPCTargetLowering::LowerCall_AIX(
7522 SDValue Chain, SDValue Callee, CallFlags CFlags,
7524 const SmallVectorImpl<SDValue> &OutVals,
7525 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7527 const CallBase *CB) const {
7528 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7529 // AIX ABI stack frame layout.
7530
7531 assert((CFlags.CallConv == CallingConv::C ||
7532 CFlags.CallConv == CallingConv::Cold ||
7533 CFlags.CallConv == CallingConv::Fast) &&
7534 "Unexpected calling convention!");
7535
7536 if (CFlags.IsPatchPoint)
7537 report_fatal_error("This call type is unimplemented on AIX.");
7538
7539 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7540
7543 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7544 *DAG.getContext());
7545
7546 // Reserve space for the linkage save area (LSA) on the stack.
7547 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7548 // [SP][CR][LR][2 x reserved][TOC].
7549 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7550 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7551 const bool IsPPC64 = Subtarget.isPPC64();
7552 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7553 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7554 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7555 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7556
7557 // The prolog code of the callee may store up to 8 GPR argument registers to
7558 // the stack, allowing va_start to index over them in memory if the callee
7559 // is variadic.
7560 // Because we cannot tell if this is needed on the caller side, we have to
7561 // conservatively assume that it is needed. As such, make sure we have at
7562 // least enough stack space for the caller to store the 8 GPRs.
7563 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7564 const unsigned NumBytes = std::max<unsigned>(
7565 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7566
7567 // Adjust the stack pointer for the new arguments...
7568 // These operations are automatically eliminated by the prolog/epilog pass.
7569 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7570 SDValue CallSeqStart = Chain;
7571
7573 SmallVector<SDValue, 8> MemOpChains;
7574
7575 // Set up a copy of the stack pointer for loading and storing any
7576 // arguments that may not fit in the registers available for argument
7577 // passing.
7578 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7579 : DAG.getRegister(PPC::R1, MVT::i32);
7580
7581 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7582 const unsigned ValNo = ArgLocs[I].getValNo();
7583 SDValue Arg = OutVals[ValNo];
7584 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7585
7586 if (Flags.isByVal()) {
7587 const unsigned ByValSize = Flags.getByValSize();
7588
7589 // Nothing to do for zero-sized ByVals on the caller side.
7590 if (!ByValSize) {
7591 ++I;
7592 continue;
7593 }
7594
7595 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7596 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7597 (LoadOffset != 0)
7598 ? DAG.getObjectPtrOffset(
7599 dl, Arg, TypeSize::getFixed(LoadOffset))
7600 : Arg,
7601 MachinePointerInfo(), VT);
7602 };
7603
7604 unsigned LoadOffset = 0;
7605
7606 // Initialize registers, which are fully occupied by the by-val argument.
7607 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7608 SDValue Load = GetLoad(PtrVT, LoadOffset);
7609 MemOpChains.push_back(Load.getValue(1));
7610 LoadOffset += PtrByteSize;
7611 const CCValAssign &ByValVA = ArgLocs[I++];
7612 assert(ByValVA.getValNo() == ValNo &&
7613 "Unexpected location for pass-by-value argument.");
7614 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7615 }
7616
7617 if (LoadOffset == ByValSize)
7618 continue;
7619
7620 // There must be one more loc to handle the remainder.
7621 assert(ArgLocs[I].getValNo() == ValNo &&
7622 "Expected additional location for by-value argument.");
7623
7624 if (ArgLocs[I].isMemLoc()) {
7625 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7626 const CCValAssign &ByValVA = ArgLocs[I++];
7627 ISD::ArgFlagsTy MemcpyFlags = Flags;
7628 // Only memcpy the bytes that don't pass in register.
7629 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7630 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7631 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7632 dl, Arg, TypeSize::getFixed(LoadOffset))
7633 : Arg,
7635 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7636 CallSeqStart, MemcpyFlags, DAG, dl);
7637 continue;
7638 }
7639
7640 // Initialize the final register residue.
7641 // Any residue that occupies the final by-val arg register must be
7642 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7643 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7644 // 2 and 1 byte loads.
7645 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7646 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7647 "Unexpected register residue for by-value argument.");
7648 SDValue ResidueVal;
7649 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7650 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7651 const MVT VT =
7652 N == 1 ? MVT::i8
7653 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7654 SDValue Load = GetLoad(VT, LoadOffset);
7655 MemOpChains.push_back(Load.getValue(1));
7656 LoadOffset += N;
7657 Bytes += N;
7658
7659 // By-val arguments are passed left-justfied in register.
7660 // Every load here needs to be shifted, otherwise a full register load
7661 // should have been used.
7662 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7663 "Unexpected load emitted during handling of pass-by-value "
7664 "argument.");
7665 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7666 EVT ShiftAmountTy =
7667 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7668 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7669 SDValue ShiftedLoad =
7670 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7671 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7672 ShiftedLoad)
7673 : ShiftedLoad;
7674 }
7675
7676 const CCValAssign &ByValVA = ArgLocs[I++];
7677 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7678 continue;
7679 }
7680
7681 CCValAssign &VA = ArgLocs[I++];
7682 const MVT LocVT = VA.getLocVT();
7683 const MVT ValVT = VA.getValVT();
7684
7685 switch (VA.getLocInfo()) {
7686 default:
7687 report_fatal_error("Unexpected argument extension type.");
7688 case CCValAssign::Full:
7689 break;
7690 case CCValAssign::ZExt:
7691 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7692 break;
7693 case CCValAssign::SExt:
7694 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7695 break;
7696 }
7697
7698 if (VA.isRegLoc() && !VA.needsCustom()) {
7699 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7700 continue;
7701 }
7702
7703 // Vector arguments passed to VarArg functions need custom handling when
7704 // they are passed (at least partially) in GPRs.
7705 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7706 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7707 // Store value to its stack slot.
7708 SDValue PtrOff =
7709 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7710 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7711 SDValue Store =
7712 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7713 MemOpChains.push_back(Store);
7714 const unsigned OriginalValNo = VA.getValNo();
7715 // Then load the GPRs from the stack
7716 unsigned LoadOffset = 0;
7717 auto HandleCustomVecRegLoc = [&]() {
7718 assert(I != E && "Unexpected end of CCvalAssigns.");
7719 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7720 "Expected custom RegLoc.");
7721 CCValAssign RegVA = ArgLocs[I++];
7722 assert(RegVA.getValNo() == OriginalValNo &&
7723 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7724 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7725 DAG.getConstant(LoadOffset, dl, PtrVT));
7726 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7727 MemOpChains.push_back(Load.getValue(1));
7728 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7729 LoadOffset += PtrByteSize;
7730 };
7731
7732 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7733 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7734 // R10.
7735 HandleCustomVecRegLoc();
7736 HandleCustomVecRegLoc();
7737
7738 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7739 ArgLocs[I].getValNo() == OriginalValNo) {
7740 assert(!IsPPC64 &&
7741 "Only 2 custom RegLocs expected for 64-bit codegen.");
7742 HandleCustomVecRegLoc();
7743 HandleCustomVecRegLoc();
7744 }
7745
7746 continue;
7747 }
7748
7749 if (VA.isMemLoc()) {
7750 SDValue PtrOff =
7751 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7752 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7753 MemOpChains.push_back(
7754 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7755
7756 continue;
7757 }
7758
7759 if (!ValVT.isFloatingPoint())
7761 "Unexpected register handling for calling convention.");
7762
7763 // Custom handling is used for GPR initializations for vararg float
7764 // arguments.
7765 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7766 LocVT.isInteger() &&
7767 "Custom register handling only expected for VarArg.");
7768
7769 SDValue ArgAsInt =
7770 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7771
7772 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7773 // f32 in 32-bit GPR
7774 // f64 in 64-bit GPR
7775 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7776 else if (Arg.getValueType().getFixedSizeInBits() <
7777 LocVT.getFixedSizeInBits())
7778 // f32 in 64-bit GPR.
7779 RegsToPass.push_back(std::make_pair(
7780 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7781 else {
7782 // f64 in two 32-bit GPRs
7783 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7784 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7785 "Unexpected custom register for argument!");
7786 CCValAssign &GPR1 = VA;
7787 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7788 DAG.getConstant(32, dl, MVT::i8));
7789 RegsToPass.push_back(std::make_pair(
7790 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7791
7792 if (I != E) {
7793 // If only 1 GPR was available, there will only be one custom GPR and
7794 // the argument will also pass in memory.
7795 CCValAssign &PeekArg = ArgLocs[I];
7796 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7797 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7798 CCValAssign &GPR2 = ArgLocs[I++];
7799 RegsToPass.push_back(std::make_pair(
7800 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7801 }
7802 }
7803 }
7804 }
7805
7806 if (!MemOpChains.empty())
7807 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7808
7809 // For indirect calls, we need to save the TOC base to the stack for
7810 // restoration after the call.
7811 if (CFlags.IsIndirect) {
7812 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7813 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7814 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7815 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7816 const unsigned TOCSaveOffset =
7817 Subtarget.getFrameLowering()->getTOCSaveOffset();
7818
7819 setUsesTOCBasePtr(DAG);
7820 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7821 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7822 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7823 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7824 Chain = DAG.getStore(
7825 Val.getValue(1), dl, Val, AddPtr,
7826 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7827 }
7828
7829 // Build a sequence of copy-to-reg nodes chained together with token chain
7830 // and flag operands which copy the outgoing args into the appropriate regs.
7831 SDValue InGlue;
7832 for (auto Reg : RegsToPass) {
7833 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7834 InGlue = Chain.getValue(1);
7835 }
7836
7837 const int SPDiff = 0;
7838 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7839 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7840}
7841
7842bool
7843PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7844 MachineFunction &MF, bool isVarArg,
7846 LLVMContext &Context) const {
7848 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7849 return CCInfo.CheckReturn(
7850 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7852 : RetCC_PPC);
7853}
7854
7855SDValue
7856PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7857 bool isVarArg,
7859 const SmallVectorImpl<SDValue> &OutVals,
7860 const SDLoc &dl, SelectionDAG &DAG) const {
7862 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7863 *DAG.getContext());
7864 CCInfo.AnalyzeReturn(Outs,
7865 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7867 : RetCC_PPC);
7868
7869 SDValue Glue;
7870 SmallVector<SDValue, 4> RetOps(1, Chain);
7871
7872 // Copy the result values into the output registers.
7873 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7874 CCValAssign &VA = RVLocs[i];
7875 assert(VA.isRegLoc() && "Can only return in registers!");
7876
7877 SDValue Arg = OutVals[RealResIdx];
7878
7879 switch (VA.getLocInfo()) {
7880 default: llvm_unreachable("Unknown loc info!");
7881 case CCValAssign::Full: break;
7882 case CCValAssign::AExt:
7883 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7884 break;
7885 case CCValAssign::ZExt:
7886 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7887 break;
7888 case CCValAssign::SExt:
7889 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7890 break;
7891 }
7892 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7893 bool isLittleEndian = Subtarget.isLittleEndian();
7894 // Legalize ret f64 -> ret 2 x i32.
7895 SDValue SVal =
7896 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7897 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7898 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7899 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7900 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7901 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7902 Glue = Chain.getValue(1);
7903 VA = RVLocs[++i]; // skip ahead to next loc
7904 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7905 } else
7906 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7907 Glue = Chain.getValue(1);
7908 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7909 }
7910
7911 RetOps[0] = Chain; // Update chain.
7912
7913 // Add the glue if we have it.
7914 if (Glue.getNode())
7915 RetOps.push_back(Glue);
7916
7917 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7918}
7919
7920SDValue
7921PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7922 SelectionDAG &DAG) const {
7923 SDLoc dl(Op);
7924
7925 // Get the correct type for integers.
7926 EVT IntVT = Op.getValueType();
7927
7928 // Get the inputs.
7929 SDValue Chain = Op.getOperand(0);
7930 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7931 // Build a DYNAREAOFFSET node.
7932 SDValue Ops[2] = {Chain, FPSIdx};
7933 SDVTList VTs = DAG.getVTList(IntVT);
7934 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7935}
7936
7937SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7938 SelectionDAG &DAG) const {
7939 // When we pop the dynamic allocation we need to restore the SP link.
7940 SDLoc dl(Op);
7941
7942 // Get the correct type for pointers.
7943 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7944
7945 // Construct the stack pointer operand.
7946 bool isPPC64 = Subtarget.isPPC64();
7947 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7948 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7949
7950 // Get the operands for the STACKRESTORE.
7951 SDValue Chain = Op.getOperand(0);
7952 SDValue SaveSP = Op.getOperand(1);
7953
7954 // Load the old link SP.
7955 SDValue LoadLinkSP =
7956 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7957
7958 // Restore the stack pointer.
7959 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7960
7961 // Store the old link SP.
7962 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7963}
7964
7965SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7967 bool isPPC64 = Subtarget.isPPC64();
7968 EVT PtrVT = getPointerTy(MF.getDataLayout());
7969
7970 // Get current frame pointer save index. The users of this index will be
7971 // primarily DYNALLOC instructions.
7973 int RASI = FI->getReturnAddrSaveIndex();
7974
7975 // If the frame pointer save index hasn't been defined yet.
7976 if (!RASI) {
7977 // Find out what the fix offset of the frame pointer save area.
7978 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7979 // Allocate the frame index for frame pointer save area.
7980 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7981 // Save the result.
7982 FI->setReturnAddrSaveIndex(RASI);
7983 }
7984 return DAG.getFrameIndex(RASI, PtrVT);
7985}
7986
7987SDValue
7988PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7990 bool isPPC64 = Subtarget.isPPC64();
7991 EVT PtrVT = getPointerTy(MF.getDataLayout());
7992
7993 // Get current frame pointer save index. The users of this index will be
7994 // primarily DYNALLOC instructions.
7996 int FPSI = FI->getFramePointerSaveIndex();
7997
7998 // If the frame pointer save index hasn't been defined yet.
7999 if (!FPSI) {
8000 // Find out what the fix offset of the frame pointer save area.
8001 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8002 // Allocate the frame index for frame pointer save area.
8003 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8004 // Save the result.
8005 FI->setFramePointerSaveIndex(FPSI);
8006 }
8007 return DAG.getFrameIndex(FPSI, PtrVT);
8008}
8009
8010SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8011 SelectionDAG &DAG) const {
8013 // Get the inputs.
8014 SDValue Chain = Op.getOperand(0);
8015 SDValue Size = Op.getOperand(1);
8016 SDLoc dl(Op);
8017
8018 // Get the correct type for pointers.
8019 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8020 // Negate the size.
8021 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8022 DAG.getConstant(0, dl, PtrVT), Size);
8023 // Construct a node for the frame pointer save index.
8024 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8025 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8026 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8027 if (hasInlineStackProbe(MF))
8028 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8029 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8030}
8031
8032SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8033 SelectionDAG &DAG) const {
8035
8036 bool isPPC64 = Subtarget.isPPC64();
8037 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8038
8039 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8040 return DAG.getFrameIndex(FI, PtrVT);
8041}
8042
8043SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8044 SelectionDAG &DAG) const {
8045 SDLoc DL(Op);
8046 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8047 DAG.getVTList(MVT::i32, MVT::Other),
8048 Op.getOperand(0), Op.getOperand(1));
8049}
8050
8051SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8052 SelectionDAG &DAG) const {
8053 SDLoc DL(Op);
8054 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8055 Op.getOperand(0), Op.getOperand(1));
8056}
8057
8058SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8059 if (Op.getValueType().isVector())
8060 return LowerVectorLoad(Op, DAG);
8061
8062 assert(Op.getValueType() == MVT::i1 &&
8063 "Custom lowering only for i1 loads");
8064
8065 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8066
8067 SDLoc dl(Op);
8069
8070 SDValue Chain = LD->getChain();
8071 SDValue BasePtr = LD->getBasePtr();
8072 MachineMemOperand *MMO = LD->getMemOperand();
8073
8074 SDValue NewLD =
8075 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8076 BasePtr, MVT::i8, MMO);
8077 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8078
8079 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8080 return DAG.getMergeValues(Ops, dl);
8081}
8082
8083SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8084 if (Op.getOperand(1).getValueType().isVector())
8085 return LowerVectorStore(Op, DAG);
8086
8087 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8088 "Custom lowering only for i1 stores");
8089
8090 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8091
8092 SDLoc dl(Op);
8094
8095 SDValue Chain = ST->getChain();
8096 SDValue BasePtr = ST->getBasePtr();
8097 SDValue Value = ST->getValue();
8098 MachineMemOperand *MMO = ST->getMemOperand();
8099
8101 Value);
8102 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8103}
8104
8105// FIXME: Remove this once the ANDI glue bug is fixed:
8106SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8107 assert(Op.getValueType() == MVT::i1 &&
8108 "Custom lowering only for i1 results");
8109
8110 SDLoc DL(Op);
8111 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8112}
8113
8114SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8115 SelectionDAG &DAG) const {
8116
8117 // Implements a vector truncate that fits in a vector register as a shuffle.
8118 // We want to legalize vector truncates down to where the source fits in
8119 // a vector register (and target is therefore smaller than vector register
8120 // size). At that point legalization will try to custom lower the sub-legal
8121 // result and get here - where we can contain the truncate as a single target
8122 // operation.
8123
8124 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8125 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8126 //
8127 // We will implement it for big-endian ordering as this (where x denotes
8128 // undefined):
8129 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8130 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8131 //
8132 // The same operation in little-endian ordering will be:
8133 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8134 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8135
8136 EVT TrgVT = Op.getValueType();
8137 assert(TrgVT.isVector() && "Vector type expected.");
8138 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8139 EVT EltVT = TrgVT.getVectorElementType();
8140 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8141 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8143 return SDValue();
8144
8145 SDValue N1 = Op.getOperand(0);
8146 EVT SrcVT = N1.getValueType();
8147 unsigned SrcSize = SrcVT.getSizeInBits();
8148 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8151 return SDValue();
8152 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8153 return SDValue();
8154
8155 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8156 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8157
8158 SDLoc DL(Op);
8159 SDValue Op1, Op2;
8160 if (SrcSize == 256) {
8161 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8162 EVT SplitVT =
8164 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8165 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8166 DAG.getConstant(0, DL, VecIdxTy));
8167 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8168 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8169 }
8170 else {
8171 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8172 Op2 = DAG.getUNDEF(WideVT);
8173 }
8174
8175 // First list the elements we want to keep.
8176 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8177 SmallVector<int, 16> ShuffV;
8178 if (Subtarget.isLittleEndian())
8179 for (unsigned i = 0; i < TrgNumElts; ++i)
8180 ShuffV.push_back(i * SizeMult);
8181 else
8182 for (unsigned i = 1; i <= TrgNumElts; ++i)
8183 ShuffV.push_back(i * SizeMult - 1);
8184
8185 // Populate the remaining elements with undefs.
8186 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8187 // ShuffV.push_back(i + WideNumElts);
8188 ShuffV.push_back(WideNumElts + 1);
8189
8190 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8191 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8192 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8193}
8194
8195/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8196/// possible.
8197SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8198 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8199 EVT ResVT = Op.getValueType();
8200 EVT CmpVT = Op.getOperand(0).getValueType();
8201 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8202 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8203 SDLoc dl(Op);
8204
8205 // Without power9-vector, we don't have native instruction for f128 comparison.
8206 // Following transformation to libcall is needed for setcc:
8207 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8208 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8209 SDValue Z = DAG.getSetCC(
8210 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8211 LHS, RHS, CC);
8212 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8213 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8214 }
8215
8216 // Not FP, or using SPE? Not a fsel.
8217 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8218 Subtarget.hasSPE())
8219 return Op;
8220
8221 SDNodeFlags Flags = Op.getNode()->getFlags();
8222
8223 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8224 // presence of infinities.
8225 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8226 switch (CC) {
8227 default:
8228 break;
8229 case ISD::SETOGT:
8230 case ISD::SETGT:
8231 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8232 case ISD::SETOLT:
8233 case ISD::SETLT:
8234 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8235 }
8236 }
8237
8238 // We might be able to do better than this under some circumstances, but in
8239 // general, fsel-based lowering of select is a finite-math-only optimization.
8240 // For more information, see section F.3 of the 2.06 ISA specification.
8241 // With ISA 3.0
8242 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8243 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8244 ResVT == MVT::f128)
8245 return Op;
8246
8247 // If the RHS of the comparison is a 0.0, we don't need to do the
8248 // subtraction at all.
8249 SDValue Sel1;
8250 if (isFloatingPointZero(RHS))
8251 switch (CC) {
8252 default: break; // SETUO etc aren't handled by fsel.
8253 case ISD::SETNE:
8254 std::swap(TV, FV);
8255 [[fallthrough]];
8256 case ISD::SETEQ:
8257 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8258 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8259 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8260 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8261 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8262 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8263 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8264 case ISD::SETULT:
8265 case ISD::SETLT:
8266 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8267 [[fallthrough]];
8268 case ISD::SETOGE:
8269 case ISD::SETGE:
8270 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8271 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8272 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8273 case ISD::SETUGT:
8274 case ISD::SETGT:
8275 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8276 [[fallthrough]];
8277 case ISD::SETOLE:
8278 case ISD::SETLE:
8279 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8280 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8281 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8282 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8283 }
8284
8285 SDValue Cmp;
8286 switch (CC) {
8287 default: break; // SETUO etc aren't handled by fsel.
8288 case ISD::SETNE:
8289 std::swap(TV, FV);
8290 [[fallthrough]];
8291 case ISD::SETEQ:
8292 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8293 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8294 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8295 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8296 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8297 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8298 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8299 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8300 case ISD::SETULT:
8301 case ISD::SETLT:
8302 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8303 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8304 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8305 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8306 case ISD::SETOGE:
8307 case ISD::SETGE:
8308 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8309 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8310 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8311 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8312 case ISD::SETUGT:
8313 case ISD::SETGT:
8314 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8315 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8316 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8317 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8318 case ISD::SETOLE:
8319 case ISD::SETLE:
8320 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8321 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8322 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8323 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8324 }
8325 return Op;
8326}
8327
8328static unsigned getPPCStrictOpcode(unsigned Opc) {
8329 switch (Opc) {
8330 default:
8331 llvm_unreachable("No strict version of this opcode!");
8332 case PPCISD::FCTIDZ:
8333 return PPCISD::STRICT_FCTIDZ;
8334 case PPCISD::FCTIWZ:
8335 return PPCISD::STRICT_FCTIWZ;
8336 case PPCISD::FCTIDUZ:
8338 case PPCISD::FCTIWUZ:
8340 case PPCISD::FCFID:
8341 return PPCISD::STRICT_FCFID;
8342 case PPCISD::FCFIDU:
8343 return PPCISD::STRICT_FCFIDU;
8344 case PPCISD::FCFIDS:
8345 return PPCISD::STRICT_FCFIDS;
8346 case PPCISD::FCFIDUS:
8348 }
8349}
8350
8352 const PPCSubtarget &Subtarget) {
8353 SDLoc dl(Op);
8354 bool IsStrict = Op->isStrictFPOpcode();
8355 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8356 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8357
8358 // TODO: Any other flags to propagate?
8359 SDNodeFlags Flags;
8360 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8361
8362 // For strict nodes, source is the second operand.
8363 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8364 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8365 MVT DestTy = Op.getSimpleValueType();
8366 assert(Src.getValueType().isFloatingPoint() &&
8367 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8368 DestTy == MVT::i64) &&
8369 "Invalid FP_TO_INT types");
8370 if (Src.getValueType() == MVT::f32) {
8371 if (IsStrict) {
8372 Src =
8374 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8375 Chain = Src.getValue(1);
8376 } else
8377 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8378 }
8379 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8380 DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8381 unsigned Opc = ISD::DELETED_NODE;
8382 switch (DestTy.SimpleTy) {
8383 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8384 case MVT::i32:
8385 Opc = IsSigned ? PPCISD::FCTIWZ
8386 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8387 break;
8388 case MVT::i64:
8389 assert((IsSigned || Subtarget.hasFPCVT()) &&
8390 "i64 FP_TO_UINT is supported only with FPCVT");
8391 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8392 }
8393 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8394 SDValue Conv;
8395 if (IsStrict) {
8396 Opc = getPPCStrictOpcode(Opc);
8397 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8398 Flags);
8399 } else {
8400 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8401 }
8402 return Conv;
8403}
8404
8405void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8406 SelectionDAG &DAG,
8407 const SDLoc &dl) const {
8408 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8409 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8410 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8411 bool IsStrict = Op->isStrictFPOpcode();
8412
8413 // Convert the FP value to an int value through memory.
8414 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8415 (IsSigned || Subtarget.hasFPCVT());
8416 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8417 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8418 MachinePointerInfo MPI =
8420
8421 // Emit a store to the stack slot.
8422 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8423 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8424 if (i32Stack) {
8426 Alignment = Align(4);
8427 MachineMemOperand *MMO =
8428 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8429 SDValue Ops[] = { Chain, Tmp, FIPtr };
8430 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8431 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8432 } else
8433 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8434
8435 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8436 // add in a bias on big endian.
8437 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8438 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8439 DAG.getConstant(4, dl, FIPtr.getValueType()));
8440 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8441 }
8442
8443 RLI.Chain = Chain;
8444 RLI.Ptr = FIPtr;
8445 RLI.MPI = MPI;
8446 RLI.Alignment = Alignment;
8447}
8448
8449/// Custom lowers floating point to integer conversions to use
8450/// the direct move instructions available in ISA 2.07 to avoid the
8451/// need for load/store combinations.
8452SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8453 SelectionDAG &DAG,
8454 const SDLoc &dl) const {
8455 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8456 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8457 if (Op->isStrictFPOpcode())
8458 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8459 else
8460 return Mov;
8461}
8462
8463SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8464 const SDLoc &dl) const {
8465 bool IsStrict = Op->isStrictFPOpcode();
8466 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8467 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8468 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8469 EVT SrcVT = Src.getValueType();
8470 EVT DstVT = Op.getValueType();
8471
8472 // FP to INT conversions are legal for f128.
8473 if (SrcVT == MVT::f128)
8474 return Subtarget.hasP9Vector() ? Op : SDValue();
8475
8476 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8477 // PPC (the libcall is not available).
8478 if (SrcVT == MVT::ppcf128) {
8479 if (DstVT == MVT::i32) {
8480 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8481 // set other fast-math flags to FP operations in both strict and
8482 // non-strict cases. (FP_TO_SINT, FSUB)
8483 SDNodeFlags Flags;
8484 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8485
8486 if (IsSigned) {
8487 SDValue Lo, Hi;
8488 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8489
8490 // Add the two halves of the long double in round-to-zero mode, and use
8491 // a smaller FP_TO_SINT.
8492 if (IsStrict) {
8494 DAG.getVTList(MVT::f64, MVT::Other),
8495 {Op.getOperand(0), Lo, Hi}, Flags);
8496 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8497 DAG.getVTList(MVT::i32, MVT::Other),
8498 {Res.getValue(1), Res}, Flags);
8499 } else {
8500 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8501 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8502 }
8503 } else {
8504 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8505 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8506 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8507 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8508 if (IsStrict) {
8509 // Sel = Src < 0x80000000
8510 // FltOfs = select Sel, 0.0, 0x80000000
8511 // IntOfs = select Sel, 0, 0x80000000
8512 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8513 SDValue Chain = Op.getOperand(0);
8514 EVT SetCCVT =
8515 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8516 EVT DstSetCCVT =
8517 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8518 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8519 Chain, true);
8520 Chain = Sel.getValue(1);
8521
8522 SDValue FltOfs = DAG.getSelect(
8523 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8524 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8525
8526 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8527 DAG.getVTList(SrcVT, MVT::Other),
8528 {Chain, Src, FltOfs}, Flags);
8529 Chain = Val.getValue(1);
8530 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8531 DAG.getVTList(DstVT, MVT::Other),
8532 {Chain, Val}, Flags);
8533 Chain = SInt.getValue(1);
8534 SDValue IntOfs = DAG.getSelect(
8535 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8536 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8537 return DAG.getMergeValues({Result, Chain}, dl);
8538 } else {
8539 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8540 // FIXME: generated code sucks.
8541 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8542 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8543 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8544 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8545 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8546 }
8547 }
8548 }
8549
8550 return SDValue();
8551 }
8552
8553 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8554 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8555
8556 ReuseLoadInfo RLI;
8557 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8558
8559 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8560 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8561}
8562
8563// We're trying to insert a regular store, S, and then a load, L. If the
8564// incoming value, O, is a load, we might just be able to have our load use the
8565// address used by O. However, we don't know if anything else will store to
8566// that address before we can load from it. To prevent this situation, we need
8567// to insert our load, L, into the chain as a peer of O. To do this, we give L
8568// the same chain operand as O, we create a token factor from the chain results
8569// of O and L, and we replace all uses of O's chain result with that token
8570// factor (see spliceIntoChain below for this last part).
8571bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8572 ReuseLoadInfo &RLI,
8573 SelectionDAG &DAG,
8574 ISD::LoadExtType ET) const {
8575 // Conservatively skip reusing for constrained FP nodes.
8576 if (Op->isStrictFPOpcode())
8577 return false;
8578
8579 SDLoc dl(Op);
8580 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8581 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8582 if (ET == ISD::NON_EXTLOAD &&
8583 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8584 isOperationLegalOrCustom(Op.getOpcode(),
8585 Op.getOperand(0).getValueType())) {
8586
8587 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8588 return true;
8589 }
8590
8592 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8593 LD->isNonTemporal())
8594 return false;
8595 if (LD->getMemoryVT() != MemVT)
8596 return false;
8597
8598 // If the result of the load is an illegal type, then we can't build a
8599 // valid chain for reuse since the legalised loads and token factor node that
8600 // ties the legalised loads together uses a different output chain then the
8601 // illegal load.
8602 if (!isTypeLegal(LD->getValueType(0)))
8603 return false;
8604
8605 RLI.Ptr = LD->getBasePtr();
8606 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8607 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8608 "Non-pre-inc AM on PPC?");
8609 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8610 LD->getOffset());
8611 }
8612
8613 RLI.Chain = LD->getChain();
8614 RLI.MPI = LD->getPointerInfo();
8615 RLI.IsDereferenceable = LD->isDereferenceable();
8616 RLI.IsInvariant = LD->isInvariant();
8617 RLI.Alignment = LD->getAlign();
8618 RLI.AAInfo = LD->getAAInfo();
8619 RLI.Ranges = LD->getRanges();
8620
8621 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8622 return true;
8623}
8624
8625// Given the head of the old chain, ResChain, insert a token factor containing
8626// it and NewResChain, and make users of ResChain now be users of that token
8627// factor.
8628// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8629void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8630 SDValue NewResChain,
8631 SelectionDAG &DAG) const {
8632 if (!ResChain)
8633 return;
8634
8635 SDLoc dl(NewResChain);
8636
8637 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8638 NewResChain, DAG.getUNDEF(MVT::Other));
8639 assert(TF.getNode() != NewResChain.getNode() &&
8640 "A new TF really is required here");
8641
8642 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8643 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8644}
8645
8646/// Analyze profitability of direct move
8647/// prefer float load to int load plus direct move
8648/// when there is no integer use of int load
8649bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8650 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8651 if (Origin->getOpcode() != ISD::LOAD)
8652 return true;
8653
8654 // If there is no LXSIBZX/LXSIHZX, like Power8,
8655 // prefer direct move if the memory size is 1 or 2 bytes.
8656 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8657 if (!Subtarget.hasP9Vector() &&
8658 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8659 return true;
8660
8661 for (SDNode::use_iterator UI = Origin->use_begin(),
8662 UE = Origin->use_end();
8663 UI != UE; ++UI) {
8664
8665 // Only look at the users of the loaded value.
8666 if (UI.getUse().get().getResNo() != 0)
8667 continue;
8668
8669 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8670 UI->getOpcode() != ISD::UINT_TO_FP &&
8671 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8672 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8673 return true;
8674 }
8675
8676 return false;
8677}
8678
8680 const PPCSubtarget &Subtarget,
8681 SDValue Chain = SDValue()) {
8682 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8683 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8684 SDLoc dl(Op);
8685
8686 // TODO: Any other flags to propagate?
8687 SDNodeFlags Flags;
8688 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8689
8690 // If we have FCFIDS, then use it when converting to single-precision.
8691 // Otherwise, convert to double-precision and then round.
8692 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8693 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8694 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8695 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8696 if (Op->isStrictFPOpcode()) {
8697 if (!Chain)
8698 Chain = Op.getOperand(0);
8699 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8700 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8701 } else
8702 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8703}
8704
8705/// Custom lowers integer to floating point conversions to use
8706/// the direct move instructions available in ISA 2.07 to avoid the
8707/// need for load/store combinations.
8708SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8709 SelectionDAG &DAG,
8710 const SDLoc &dl) const {
8711 assert((Op.getValueType() == MVT::f32 ||
8712 Op.getValueType() == MVT::f64) &&
8713 "Invalid floating point type as target of conversion");
8714 assert(Subtarget.hasFPCVT() &&
8715 "Int to FP conversions with direct moves require FPCVT");
8716 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8717 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8718 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8719 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8720 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8721 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8722 return convertIntToFP(Op, Mov, DAG, Subtarget);
8723}
8724
8725static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8726
8727 EVT VecVT = Vec.getValueType();
8728 assert(VecVT.isVector() && "Expected a vector type.");
8729 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8730
8731 EVT EltVT = VecVT.getVectorElementType();
8732 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8733 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8734
8735 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8736 SmallVector<SDValue, 16> Ops(NumConcat);
8737 Ops[0] = Vec;
8738 SDValue UndefVec = DAG.getUNDEF(VecVT);
8739 for (unsigned i = 1; i < NumConcat; ++i)
8740 Ops[i] = UndefVec;
8741
8742 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8743}
8744
8745SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8746 const SDLoc &dl) const {
8747 bool IsStrict = Op->isStrictFPOpcode();
8748 unsigned Opc = Op.getOpcode();
8749 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8750 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8752 "Unexpected conversion type");
8753 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8754 "Supports conversions to v2f64/v4f32 only.");
8755
8756 // TODO: Any other flags to propagate?
8757 SDNodeFlags Flags;
8758 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8759
8760 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8761 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8762
8763 SDValue Wide = widenVec(DAG, Src, dl);
8764 EVT WideVT = Wide.getValueType();
8765 unsigned WideNumElts = WideVT.getVectorNumElements();
8766 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8767
8768 SmallVector<int, 16> ShuffV;
8769 for (unsigned i = 0; i < WideNumElts; ++i)
8770 ShuffV.push_back(i + WideNumElts);
8771
8772 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8773 int SaveElts = FourEltRes ? 4 : 2;
8774 if (Subtarget.isLittleEndian())
8775 for (int i = 0; i < SaveElts; i++)
8776 ShuffV[i * Stride] = i;
8777 else
8778 for (int i = 1; i <= SaveElts; i++)
8779 ShuffV[i * Stride - 1] = i - 1;
8780
8781 SDValue ShuffleSrc2 =
8782 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8783 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8784
8785 SDValue Extend;
8786 if (SignedConv) {
8787 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8788 EVT ExtVT = Src.getValueType();
8789 if (Subtarget.hasP9Altivec())
8790 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8791 IntermediateVT.getVectorNumElements());
8792
8793 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8794 DAG.getValueType(ExtVT));
8795 } else
8796 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8797
8798 if (IsStrict)
8799 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8800 {Op.getOperand(0), Extend}, Flags);
8801
8802 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8803}
8804
8805SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8806 SelectionDAG &DAG) const {
8807 SDLoc dl(Op);
8808 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8809 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8810 bool IsStrict = Op->isStrictFPOpcode();
8811 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8812 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8813
8814 // TODO: Any other flags to propagate?
8815 SDNodeFlags Flags;
8816 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8817
8818 EVT InVT = Src.getValueType();
8819 EVT OutVT = Op.getValueType();
8820 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8821 isOperationCustom(Op.getOpcode(), InVT))
8822 return LowerINT_TO_FPVector(Op, DAG, dl);
8823
8824 // Conversions to f128 are legal.
8825 if (Op.getValueType() == MVT::f128)
8826 return Subtarget.hasP9Vector() ? Op : SDValue();
8827
8828 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8829 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8830 return SDValue();
8831
8832 if (Src.getValueType() == MVT::i1) {
8833 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8834 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8835 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8836 if (IsStrict)
8837 return DAG.getMergeValues({Sel, Chain}, dl);
8838 else
8839 return Sel;
8840 }
8841
8842 // If we have direct moves, we can do all the conversion, skip the store/load
8843 // however, without FPCVT we can't do most conversions.
8844 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8845 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8846 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8847
8848 assert((IsSigned || Subtarget.hasFPCVT()) &&
8849 "UINT_TO_FP is supported only with FPCVT");
8850
8851 if (Src.getValueType() == MVT::i64) {
8852 SDValue SINT = Src;
8853 // When converting to single-precision, we actually need to convert
8854 // to double-precision first and then round to single-precision.
8855 // To avoid double-rounding effects during that operation, we have
8856 // to prepare the input operand. Bits that might be truncated when
8857 // converting to double-precision are replaced by a bit that won't
8858 // be lost at this stage, but is below the single-precision rounding
8859 // position.
8860 //
8861 // However, if -enable-unsafe-fp-math is in effect, accept double
8862 // rounding to avoid the extra overhead.
8863 if (Op.getValueType() == MVT::f32 &&
8864 !Subtarget.hasFPCVT() &&
8866
8867 // Twiddle input to make sure the low 11 bits are zero. (If this
8868 // is the case, we are guaranteed the value will fit into the 53 bit
8869 // mantissa of an IEEE double-precision value without rounding.)
8870 // If any of those low 11 bits were not zero originally, make sure
8871 // bit 12 (value 2048) is set instead, so that the final rounding
8872 // to single-precision gets the correct result.
8873 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8874 SINT, DAG.getConstant(2047, dl, MVT::i64));
8875 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8876 Round, DAG.getConstant(2047, dl, MVT::i64));
8877 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8878 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8879 Round, DAG.getConstant(-2048, dl, MVT::i64));
8880
8881 // However, we cannot use that value unconditionally: if the magnitude
8882 // of the input value is small, the bit-twiddling we did above might
8883 // end up visibly changing the output. Fortunately, in that case, we
8884 // don't need to twiddle bits since the original input will convert
8885 // exactly to double-precision floating-point already. Therefore,
8886 // construct a conditional to use the original value if the top 11
8887 // bits are all sign-bit copies, and use the rounded value computed
8888 // above otherwise.
8889 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8890 SINT, DAG.getConstant(53, dl, MVT::i32));
8891 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8892 Cond, DAG.getConstant(1, dl, MVT::i64));
8893 Cond = DAG.getSetCC(
8894 dl,
8895 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8896 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8897
8898 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8899 }
8900
8901 ReuseLoadInfo RLI;
8902 SDValue Bits;
8903
8905 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8906 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8907 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8908 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8909 } else if (Subtarget.hasLFIWAX() &&
8910 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8911 MachineMemOperand *MMO =
8913 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8914 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8916 DAG.getVTList(MVT::f64, MVT::Other),
8917 Ops, MVT::i32, MMO);
8918 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8919 } else if (Subtarget.hasFPCVT() &&
8920 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8921 MachineMemOperand *MMO =
8923 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8924 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8926 DAG.getVTList(MVT::f64, MVT::Other),
8927 Ops, MVT::i32, MMO);
8928 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8929 } else if (((Subtarget.hasLFIWAX() &&
8930 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8931 (Subtarget.hasFPCVT() &&
8932 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8933 SINT.getOperand(0).getValueType() == MVT::i32) {
8934 MachineFrameInfo &MFI = MF.getFrameInfo();
8935 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8936
8937 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8938 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8939
8940 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8942 DAG.getMachineFunction(), FrameIdx));
8943 Chain = Store;
8944
8945 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8946 "Expected an i32 store");
8947
8948 RLI.Ptr = FIdx;
8949 RLI.Chain = Chain;
8950 RLI.MPI =
8952 RLI.Alignment = Align(4);
8953
8954 MachineMemOperand *MMO =
8956 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8957 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8960 dl, DAG.getVTList(MVT::f64, MVT::Other),
8961 Ops, MVT::i32, MMO);
8962 Chain = Bits.getValue(1);
8963 } else
8964 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8965
8966 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8967 if (IsStrict)
8968 Chain = FP.getValue(1);
8969
8970 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8971 if (IsStrict)
8973 DAG.getVTList(MVT::f32, MVT::Other),
8974 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8975 else
8976 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8977 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8978 }
8979 return FP;
8980 }
8981
8982 assert(Src.getValueType() == MVT::i32 &&
8983 "Unhandled INT_TO_FP type in custom expander!");
8984 // Since we only generate this in 64-bit mode, we can take advantage of
8985 // 64-bit registers. In particular, sign extend the input value into the
8986 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8987 // then lfd it and fcfid it.
8989 MachineFrameInfo &MFI = MF.getFrameInfo();
8990 EVT PtrVT = getPointerTy(MF.getDataLayout());
8991
8992 SDValue Ld;
8993 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8994 ReuseLoadInfo RLI;
8995 bool ReusingLoad;
8996 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8997 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8998 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8999
9000 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
9002 DAG.getMachineFunction(), FrameIdx));
9003 Chain = Store;
9004
9005 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9006 "Expected an i32 store");
9007
9008 RLI.Ptr = FIdx;
9009 RLI.Chain = Chain;
9010 RLI.MPI =
9012 RLI.Alignment = Align(4);
9013 }
9014
9015 MachineMemOperand *MMO =
9017 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9018 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9019 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9020 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9021 MVT::i32, MMO);
9022 Chain = Ld.getValue(1);
9023 if (ReusingLoad)
9024 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
9025 } else {
9026 assert(Subtarget.isPPC64() &&
9027 "i32->FP without LFIWAX supported only on PPC64");
9028
9029 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9030 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9031
9032 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9033
9034 // STD the extended value into the stack slot.
9035 SDValue Store = DAG.getStore(
9036 Chain, dl, Ext64, FIdx,
9038 Chain = Store;
9039
9040 // Load the value as a double.
9041 Ld = DAG.getLoad(
9042 MVT::f64, dl, Chain, FIdx,
9044 Chain = Ld.getValue(1);
9045 }
9046
9047 // FCFID it and return it.
9048 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9049 if (IsStrict)
9050 Chain = FP.getValue(1);
9051 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9052 if (IsStrict)
9054 DAG.getVTList(MVT::f32, MVT::Other),
9055 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
9056 else
9057 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9058 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9059 }
9060 return FP;
9061}
9062
9063SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9064 SelectionDAG &DAG) const {
9065 SDLoc Dl(Op);
9067 EVT PtrVT = getPointerTy(MF.getDataLayout());
9068 SDValue Chain = Op.getOperand(0);
9069
9070 // If requested mode is constant, just use simpler mtfsb/mffscrni
9071 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9072 uint64_t Mode = CVal->getZExtValue();
9073 assert(Mode < 4 && "Unsupported rounding mode!");
9074 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9075 if (Subtarget.isISA3_0())
9076 return SDValue(
9077 DAG.getMachineNode(
9078 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9079 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9080 1);
9081 SDNode *SetHi = DAG.getMachineNode(
9082 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9083 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9084 SDNode *SetLo = DAG.getMachineNode(
9085 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9086 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9087 return SDValue(SetLo, 0);
9088 }
9089
9090 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9091 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9092 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9093 DAG.getConstant(3, Dl, MVT::i32));
9094 SDValue DstFlag = DAG.getNode(
9095 ISD::XOR, Dl, MVT::i32, SrcFlag,
9096 DAG.getNode(ISD::AND, Dl, MVT::i32,
9097 DAG.getNOT(Dl,
9098 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9099 MVT::i32),
9100 One));
9101 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9102 SDValue MFFS;
9103 if (!Subtarget.isISA3_0()) {
9104 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9105 Chain = MFFS.getValue(1);
9106 }
9107 SDValue NewFPSCR;
9108 if (Subtarget.isPPC64()) {
9109 if (Subtarget.isISA3_0()) {
9110 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9111 } else {
9112 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9113 SDNode *InsertRN = DAG.getMachineNode(
9114 PPC::RLDIMI, Dl, MVT::i64,
9115 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9116 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9117 DAG.getTargetConstant(0, Dl, MVT::i32),
9118 DAG.getTargetConstant(62, Dl, MVT::i32)});
9119 NewFPSCR = SDValue(InsertRN, 0);
9120 }
9121 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9122 } else {
9123 // In 32-bit mode, store f64, load and update the lower half.
9124 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9125 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9126 SDValue Addr = Subtarget.isLittleEndian()
9127 ? StackSlot
9128 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9129 DAG.getConstant(4, Dl, PtrVT));
9130 if (Subtarget.isISA3_0()) {
9131 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9132 } else {
9133 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9134 SDValue Tmp =
9135 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9136 Chain = Tmp.getValue(1);
9137 Tmp = SDValue(DAG.getMachineNode(
9138 PPC::RLWIMI, Dl, MVT::i32,
9139 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9140 DAG.getTargetConstant(30, Dl, MVT::i32),
9141 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9142 0);
9143 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9144 }
9145 NewFPSCR =
9146 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9147 Chain = NewFPSCR.getValue(1);
9148 }
9149 if (Subtarget.isISA3_0())
9150 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9151 {NewFPSCR, Chain}),
9152 1);
9153 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9154 SDNode *MTFSF = DAG.getMachineNode(
9155 PPC::MTFSF, Dl, MVT::Other,
9156 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9157 return SDValue(MTFSF, 0);
9158}
9159
9160SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9161 SelectionDAG &DAG) const {
9162 SDLoc dl(Op);
9163 /*
9164 The rounding mode is in bits 30:31 of FPSR, and has the following
9165 settings:
9166 00 Round to nearest
9167 01 Round to 0
9168 10 Round to +inf
9169 11 Round to -inf
9170
9171 GET_ROUNDING, on the other hand, expects the following:
9172 -1 Undefined
9173 0 Round to 0
9174 1 Round to nearest
9175 2 Round to +inf
9176 3 Round to -inf
9177
9178 To perform the conversion, we do:
9179 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9180 */
9181
9183 EVT VT = Op.getValueType();
9184 EVT PtrVT = getPointerTy(MF.getDataLayout());
9185
9186 // Save FP Control Word to register
9187 SDValue Chain = Op.getOperand(0);
9188 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9189 Chain = MFFS.getValue(1);
9190
9191 SDValue CWD;
9192 if (isTypeLegal(MVT::i64)) {
9193 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9194 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9195 } else {
9196 // Save FP register to stack slot
9197 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9198 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9199 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9200
9201 // Load FP Control Word from low 32 bits of stack slot.
9203 "Stack slot adjustment is valid only on big endian subtargets!");
9204 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9205 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9206 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9207 Chain = CWD.getValue(1);
9208 }
9209
9210 // Transform as necessary
9211 SDValue CWD1 =
9212 DAG.getNode(ISD::AND, dl, MVT::i32,
9213 CWD, DAG.getConstant(3, dl, MVT::i32));
9214 SDValue CWD2 =
9215 DAG.getNode(ISD::SRL, dl, MVT::i32,
9216 DAG.getNode(ISD::AND, dl, MVT::i32,
9217 DAG.getNode(ISD::XOR, dl, MVT::i32,
9218 CWD, DAG.getConstant(3, dl, MVT::i32)),
9219 DAG.getConstant(3, dl, MVT::i32)),
9220 DAG.getConstant(1, dl, MVT::i32));
9221
9222 SDValue RetVal =
9223 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9224
9225 RetVal =
9227 dl, VT, RetVal);
9228
9229 return DAG.getMergeValues({RetVal, Chain}, dl);
9230}
9231
9232SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9233 EVT VT = Op.getValueType();
9234 unsigned BitWidth = VT.getSizeInBits();
9235 SDLoc dl(Op);
9236 assert(Op.getNumOperands() == 3 &&
9237 VT == Op.getOperand(1).getValueType() &&
9238 "Unexpected SHL!");
9239
9240 // Expand into a bunch of logical ops. Note that these ops
9241 // depend on the PPC behavior for oversized shift amounts.
9242 SDValue Lo = Op.getOperand(0);
9243 SDValue Hi = Op.getOperand(1);
9244 SDValue Amt = Op.getOperand(2);
9245 EVT AmtVT = Amt.getValueType();
9246
9247 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9248 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9249 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9250 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9251 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9252 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9253 DAG.getConstant(-BitWidth, dl, AmtVT));
9254 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9255 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9256 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9257 SDValue OutOps[] = { OutLo, OutHi };
9258 return DAG.getMergeValues(OutOps, dl);
9259}
9260
9261SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9262 EVT VT = Op.getValueType();
9263 SDLoc dl(Op);
9264 unsigned BitWidth = VT.getSizeInBits();
9265 assert(Op.getNumOperands() == 3 &&
9266 VT == Op.getOperand(1).getValueType() &&
9267 "Unexpected SRL!");
9268
9269 // Expand into a bunch of logical ops. Note that these ops
9270 // depend on the PPC behavior for oversized shift amounts.
9271 SDValue Lo = Op.getOperand(0);
9272 SDValue Hi = Op.getOperand(1);
9273 SDValue Amt = Op.getOperand(2);
9274 EVT AmtVT = Amt.getValueType();
9275
9276 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9277 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9278 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9279 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9280 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9281 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9282 DAG.getConstant(-BitWidth, dl, AmtVT));
9283 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9284 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9285 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9286 SDValue OutOps[] = { OutLo, OutHi };
9287 return DAG.getMergeValues(OutOps, dl);
9288}
9289
9290SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9291 SDLoc dl(Op);
9292 EVT VT = Op.getValueType();
9293 unsigned BitWidth = VT.getSizeInBits();
9294 assert(Op.getNumOperands() == 3 &&
9295 VT == Op.getOperand(1).getValueType() &&
9296 "Unexpected SRA!");
9297
9298 // Expand into a bunch of logical ops, followed by a select_cc.
9299 SDValue Lo = Op.getOperand(0);
9300 SDValue Hi = Op.getOperand(1);
9301 SDValue Amt = Op.getOperand(2);
9302 EVT AmtVT = Amt.getValueType();
9303
9304 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9305 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9306 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9307 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9308 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9309 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9310 DAG.getConstant(-BitWidth, dl, AmtVT));
9311 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9312 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9313 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9314 Tmp4, Tmp6, ISD::SETLE);
9315 SDValue OutOps[] = { OutLo, OutHi };
9316 return DAG.getMergeValues(OutOps, dl);
9317}
9318
9319SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9320 SelectionDAG &DAG) const {
9321 SDLoc dl(Op);
9322 EVT VT = Op.getValueType();
9323 unsigned BitWidth = VT.getSizeInBits();
9324
9325 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9326 SDValue X = Op.getOperand(0);
9327 SDValue Y = Op.getOperand(1);
9328 SDValue Z = Op.getOperand(2);
9329 EVT AmtVT = Z.getValueType();
9330
9331 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9332 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9333 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9334 // on PowerPC shift by BW being well defined.
9335 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9336 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9337 SDValue SubZ =
9338 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9339 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9340 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9341 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9342}
9343
9344//===----------------------------------------------------------------------===//
9345// Vector related lowering.
9346//
9347
9348/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9349/// element size of SplatSize. Cast the result to VT.
9350static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9351 SelectionDAG &DAG, const SDLoc &dl) {
9352 static const MVT VTys[] = { // canonical VT to use for each size.
9353 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9354 };
9355
9356 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9357
9358 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9359 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9360 SplatSize = 1;
9361 Val = 0xFF;
9362 }
9363
9364 EVT CanonicalVT = VTys[SplatSize-1];
9365
9366 // Build a canonical splat for this value.
9367 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9368}
9369
9370/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9371/// specified intrinsic ID.
9373 const SDLoc &dl, EVT DestVT = MVT::Other) {
9374 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9375 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9376 DAG.getConstant(IID, dl, MVT::i32), Op);
9377}
9378
9379/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9380/// specified intrinsic ID.
9381static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9382 SelectionDAG &DAG, const SDLoc &dl,
9383 EVT DestVT = MVT::Other) {
9384 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9385 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9386 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9387}
9388
9389/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9390/// specified intrinsic ID.
9391static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9392 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9393 EVT DestVT = MVT::Other) {
9394 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9395 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9396 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9397}
9398
9399/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9400/// amount. The result has the specified value type.
9401static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9402 SelectionDAG &DAG, const SDLoc &dl) {
9403 // Force LHS/RHS to be the right type.
9404 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9405 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9406
9407 int Ops[16];
9408 for (unsigned i = 0; i != 16; ++i)
9409 Ops[i] = i + Amt;
9410 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9411 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9412}
9413
9414/// Do we have an efficient pattern in a .td file for this node?
9415///
9416/// \param V - pointer to the BuildVectorSDNode being matched
9417/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9418///
9419/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9420/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9421/// the opposite is true (expansion is beneficial) are:
9422/// - The node builds a vector out of integers that are not 32 or 64-bits
9423/// - The node builds a vector out of constants
9424/// - The node is a "load-and-splat"
9425/// In all other cases, we will choose to keep the BUILD_VECTOR.
9427 bool HasDirectMove,
9428 bool HasP8Vector) {
9429 EVT VecVT = V->getValueType(0);
9430 bool RightType = VecVT == MVT::v2f64 ||
9431 (HasP8Vector && VecVT == MVT::v4f32) ||
9432 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9433 if (!RightType)
9434 return false;
9435
9436 bool IsSplat = true;
9437 bool IsLoad = false;
9438 SDValue Op0 = V->getOperand(0);
9439
9440 // This function is called in a block that confirms the node is not a constant
9441 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9442 // different constants.
9443 if (V->isConstant())
9444 return false;
9445 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9446 if (V->getOperand(i).isUndef())
9447 return false;
9448 // We want to expand nodes that represent load-and-splat even if the
9449 // loaded value is a floating point truncation or conversion to int.
9450 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9451 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9452 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9453 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9454 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9455 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9456 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9457 IsLoad = true;
9458 // If the operands are different or the input is not a load and has more
9459 // uses than just this BV node, then it isn't a splat.
9460 if (V->getOperand(i) != Op0 ||
9461 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9462 IsSplat = false;
9463 }
9464 return !(IsSplat && IsLoad);
9465}
9466
9467// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9468SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9469
9470 SDLoc dl(Op);
9471 SDValue Op0 = Op->getOperand(0);
9472
9473 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9474 (Op.getValueType() != MVT::f128))
9475 return SDValue();
9476
9477 SDValue Lo = Op0.getOperand(0);
9478 SDValue Hi = Op0.getOperand(1);
9479 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9480 return SDValue();
9481
9482 if (!Subtarget.isLittleEndian())
9483 std::swap(Lo, Hi);
9484
9485 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9486}
9487
9488static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9489 const SDValue *InputLoad = &Op;
9490 while (InputLoad->getOpcode() == ISD::BITCAST)
9491 InputLoad = &InputLoad->getOperand(0);
9492 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9494 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9495 InputLoad = &InputLoad->getOperand(0);
9496 }
9497 if (InputLoad->getOpcode() != ISD::LOAD)
9498 return nullptr;
9499 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9500 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9501}
9502
9503// Convert the argument APFloat to a single precision APFloat if there is no
9504// loss in information during the conversion to single precision APFloat and the
9505// resulting number is not a denormal number. Return true if successful.
9507 APFloat APFloatToConvert = ArgAPFloat;
9508 bool LosesInfo = true;
9510 &LosesInfo);
9511 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9512 if (Success)
9513 ArgAPFloat = APFloatToConvert;
9514 return Success;
9515}
9516
9517// Bitcast the argument APInt to a double and convert it to a single precision
9518// APFloat, bitcast the APFloat to an APInt and assign it to the original
9519// argument if there is no loss in information during the conversion from
9520// double to single precision APFloat and the resulting number is not a denormal
9521// number. Return true if successful.
9523 double DpValue = ArgAPInt.bitsToDouble();
9524 APFloat APFloatDp(DpValue);
9525 bool Success = convertToNonDenormSingle(APFloatDp);
9526 if (Success)
9527 ArgAPInt = APFloatDp.bitcastToAPInt();
9528 return Success;
9529}
9530
9531// Nondestructive check for convertTonNonDenormSingle.
9533 // Only convert if it loses info, since XXSPLTIDP should
9534 // handle the other case.
9535 APFloat APFloatToConvert = ArgAPFloat;
9536 bool LosesInfo = true;
9538 &LosesInfo);
9539
9540 return (!LosesInfo && !APFloatToConvert.isDenormal());
9541}
9542
9543static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9544 unsigned &Opcode) {
9545 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9546 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9547 return false;
9548
9549 EVT Ty = Op->getValueType(0);
9550 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9551 // as we cannot handle extending loads for these types.
9552 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9553 ISD::isNON_EXTLoad(InputNode))
9554 return true;
9555
9556 EVT MemVT = InputNode->getMemoryVT();
9557 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9558 // memory VT is the same vector element VT type.
9559 // The loads feeding into the v8i16 and v16i8 types will be extending because
9560 // scalar i8/i16 are not legal types.
9561 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9562 (MemVT == Ty.getVectorElementType()))
9563 return true;
9564
9565 if (Ty == MVT::v2i64) {
9566 // Check the extend type, when the input type is i32, and the output vector
9567 // type is v2i64.
9568 if (MemVT == MVT::i32) {
9569 if (ISD::isZEXTLoad(InputNode))
9570 Opcode = PPCISD::ZEXT_LD_SPLAT;
9571 if (ISD::isSEXTLoad(InputNode))
9572 Opcode = PPCISD::SEXT_LD_SPLAT;
9573 }
9574 return true;
9575 }
9576 return false;
9577}
9578
9579// If this is a case we can't handle, return null and let the default
9580// expansion code take care of it. If we CAN select this case, and if it
9581// selects to a single instruction, return Op. Otherwise, if we can codegen
9582// this case more efficiently than a constant pool load, lower it to the
9583// sequence of ops that should be used.
9584SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9585 SelectionDAG &DAG) const {
9586 SDLoc dl(Op);
9588 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9589
9590 // Check if this is a splat of a constant value.
9591 APInt APSplatBits, APSplatUndef;
9592 unsigned SplatBitSize;
9593 bool HasAnyUndefs;
9594 bool BVNIsConstantSplat =
9595 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9596 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9597
9598 // If it is a splat of a double, check if we can shrink it to a 32 bit
9599 // non-denormal float which when converted back to double gives us the same
9600 // double. This is to exploit the XXSPLTIDP instruction.
9601 // If we lose precision, we use XXSPLTI32DX.
9602 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9603 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9604 // Check the type first to short-circuit so we don't modify APSplatBits if
9605 // this block isn't executed.
9606 if ((Op->getValueType(0) == MVT::v2f64) &&
9607 convertToNonDenormSingle(APSplatBits)) {
9608 SDValue SplatNode = DAG.getNode(
9609 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9610 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9611 return DAG.getBitcast(Op.getValueType(), SplatNode);
9612 } else {
9613 // We may lose precision, so we have to use XXSPLTI32DX.
9614
9615 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9616 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9617 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9618
9619 if (!Hi || !Lo)
9620 // If either load is 0, then we should generate XXLXOR to set to 0.
9621 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9622
9623 if (Hi)
9624 SplatNode = DAG.getNode(
9625 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9626 DAG.getTargetConstant(0, dl, MVT::i32),
9627 DAG.getTargetConstant(Hi, dl, MVT::i32));
9628
9629 if (Lo)
9630 SplatNode =
9631 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9632 DAG.getTargetConstant(1, dl, MVT::i32),
9633 DAG.getTargetConstant(Lo, dl, MVT::i32));
9634
9635 return DAG.getBitcast(Op.getValueType(), SplatNode);
9636 }
9637 }
9638
9639 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9640 unsigned NewOpcode = PPCISD::LD_SPLAT;
9641
9642 // Handle load-and-splat patterns as we have instructions that will do this
9643 // in one go.
9644 if (DAG.isSplatValue(Op, true) &&
9645 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9646 const SDValue *InputLoad = &Op.getOperand(0);
9647 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9648
9649 // If the input load is an extending load, it will be an i32 -> i64
9650 // extending load and isValidSplatLoad() will update NewOpcode.
9651 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9652 unsigned ElementSize =
9653 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9654
9655 assert(((ElementSize == 2 * MemorySize)
9656 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9657 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9658 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9659 "Unmatched element size and opcode!\n");
9660
9661 // Checking for a single use of this load, we have to check for vector
9662 // width (128 bits) / ElementSize uses (since each operand of the
9663 // BUILD_VECTOR is a separate use of the value.
9664 unsigned NumUsesOfInputLD = 128 / ElementSize;
9665 for (SDValue BVInOp : Op->ops())
9666 if (BVInOp.isUndef())
9667 NumUsesOfInputLD--;
9668
9669 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9670 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9671 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9672 // 15", but function IsValidSplatLoad() now will only return true when
9673 // the data at index 0 is not nullptr. So we will not get into trouble for
9674 // these cases.
9675 //
9676 // case 1 - lfiwzx/lfiwax
9677 // 1.1: load result is i32 and is sign/zero extend to i64;
9678 // 1.2: build a v2i64 vector type with above loaded value;
9679 // 1.3: the vector has only one value at index 0, others are all undef;
9680 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9681 if (NumUsesOfInputLD == 1 &&
9682 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9683 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9684 Subtarget.hasLFIWAX()))
9685 return SDValue();
9686
9687 // case 2 - lxvr[hb]x
9688 // 2.1: load result is at most i16;
9689 // 2.2: build a vector with above loaded value;
9690 // 2.3: the vector has only one value at index 0, others are all undef;
9691 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9692 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9693 Subtarget.isISA3_1() && ElementSize <= 16)
9694 return SDValue();
9695
9696 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9697 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9698 Subtarget.hasVSX()) {
9699 SDValue Ops[] = {
9700 LD->getChain(), // Chain
9701 LD->getBasePtr(), // Ptr
9702 DAG.getValueType(Op.getValueType()) // VT
9703 };
9704 SDValue LdSplt = DAG.getMemIntrinsicNode(
9705 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9706 LD->getMemoryVT(), LD->getMemOperand());
9707 // Replace all uses of the output chain of the original load with the
9708 // output chain of the new load.
9709 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9710 LdSplt.getValue(1));
9711 return LdSplt;
9712 }
9713 }
9714
9715 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9716 // 32-bits can be lowered to VSX instructions under certain conditions.
9717 // Without VSX, there is no pattern more efficient than expanding the node.
9718 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9719 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9720 Subtarget.hasP8Vector()))
9721 return Op;
9722 return SDValue();
9723 }
9724
9725 uint64_t SplatBits = APSplatBits.getZExtValue();
9726 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9727 unsigned SplatSize = SplatBitSize / 8;
9728
9729 // First, handle single instruction cases.
9730
9731 // All zeros?
9732 if (SplatBits == 0) {
9733 // Canonicalize all zero vectors to be v4i32.
9734 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9735 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9736 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9737 }
9738 return Op;
9739 }
9740
9741 // We have XXSPLTIW for constant splats four bytes wide.
9742 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9743 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9744 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9745 // turned into a 4-byte splat of 0xABABABAB.
9746 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9747 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9748 Op.getValueType(), DAG, dl);
9749
9750 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9751 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9752 dl);
9753
9754 // We have XXSPLTIB for constant splats one byte wide.
9755 if (Subtarget.hasP9Vector() && SplatSize == 1)
9756 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9757 dl);
9758
9759 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9760 int32_t SextVal = SignExtend32(SplatBits, SplatBitSize);
9761 if (SextVal >= -16 && SextVal <= 15)
9762 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9763 dl);
9764
9765 // Two instruction sequences.
9766
9767 // If this value is in the range [-32,30] and is even, use:
9768 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9769 // If this value is in the range [17,31] and is odd, use:
9770 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9771 // If this value is in the range [-31,-17] and is odd, use:
9772 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9773 // Note the last two are three-instruction sequences.
9774 if (SextVal >= -32 && SextVal <= 31) {
9775 // To avoid having these optimizations undone by constant folding,
9776 // we convert to a pseudo that will be expanded later into one of
9777 // the above forms.
9778 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9779 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9780 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9781 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9782 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9783 if (VT == Op.getValueType())
9784 return RetVal;
9785 else
9786 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9787 }
9788
9789 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9790 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9791 // for fneg/fabs.
9792 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9793 // Make -1 and vspltisw -1:
9794 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9795
9796 // Make the VSLW intrinsic, computing 0x8000_0000.
9797 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9798 OnesV, DAG, dl);
9799
9800 // xor by OnesV to invert it.
9801 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9802 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9803 }
9804
9805 // Check to see if this is a wide variety of vsplti*, binop self cases.
9806 static const signed char SplatCsts[] = {
9807 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9808 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9809 };
9810
9811 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9812 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9813 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9814 int i = SplatCsts[idx];
9815
9816 // Figure out what shift amount will be used by altivec if shifted by i in
9817 // this splat size.
9818 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9819
9820 // vsplti + shl self.
9821 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9822 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9823 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9824 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9825 Intrinsic::ppc_altivec_vslw
9826 };
9827 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9828 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9829 }
9830
9831 // vsplti + srl self.
9832 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9833 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9834 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9835 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9836 Intrinsic::ppc_altivec_vsrw
9837 };
9838 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9839 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9840 }
9841
9842 // vsplti + rol self.
9843 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9844 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9845 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9846 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9847 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9848 Intrinsic::ppc_altivec_vrlw
9849 };
9850 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9851 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9852 }
9853
9854 // t = vsplti c, result = vsldoi t, t, 1
9855 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9856 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9857 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9858 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9859 }
9860 // t = vsplti c, result = vsldoi t, t, 2
9861 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9862 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9863 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9864 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9865 }
9866 // t = vsplti c, result = vsldoi t, t, 3
9867 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9868 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9869 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9870 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9871 }
9872 }
9873
9874 return SDValue();
9875}
9876
9877/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9878/// the specified operations to build the shuffle.
9879static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9880 SDValue RHS, SelectionDAG &DAG,
9881 const SDLoc &dl) {
9882 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9883 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9884 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9885
9886 enum {
9887 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9888 OP_VMRGHW,
9889 OP_VMRGLW,
9890 OP_VSPLTISW0,
9891 OP_VSPLTISW1,
9892 OP_VSPLTISW2,
9893 OP_VSPLTISW3,
9894 OP_VSLDOI4,
9895 OP_VSLDOI8,
9896 OP_VSLDOI12
9897 };
9898
9899 if (OpNum == OP_COPY) {
9900 if (LHSID == (1*9+2)*9+3) return LHS;
9901 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9902 return RHS;
9903 }
9904
9905 SDValue OpLHS, OpRHS;
9906 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9907 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9908
9909 int ShufIdxs[16];
9910 switch (OpNum) {
9911 default: llvm_unreachable("Unknown i32 permute!");
9912 case OP_VMRGHW:
9913 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9914 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9915 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9916 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9917 break;
9918 case OP_VMRGLW:
9919 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9920 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9921 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9922 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9923 break;
9924 case OP_VSPLTISW0:
9925 for (unsigned i = 0; i != 16; ++i)
9926 ShufIdxs[i] = (i&3)+0;
9927 break;
9928 case OP_VSPLTISW1:
9929 for (unsigned i = 0; i != 16; ++i)
9930 ShufIdxs[i] = (i&3)+4;
9931 break;
9932 case OP_VSPLTISW2:
9933 for (unsigned i = 0; i != 16; ++i)
9934 ShufIdxs[i] = (i&3)+8;
9935 break;
9936 case OP_VSPLTISW3:
9937 for (unsigned i = 0; i != 16; ++i)
9938 ShufIdxs[i] = (i&3)+12;
9939 break;
9940 case OP_VSLDOI4:
9941 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9942 case OP_VSLDOI8:
9943 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9944 case OP_VSLDOI12:
9945 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9946 }
9947 EVT VT = OpLHS.getValueType();
9948 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9949 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9950 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9951 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9952}
9953
9954/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9955/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9956/// SDValue.
9957SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9958 SelectionDAG &DAG) const {
9959 const unsigned BytesInVector = 16;
9960 bool IsLE = Subtarget.isLittleEndian();
9961 SDLoc dl(N);
9962 SDValue V1 = N->getOperand(0);
9963 SDValue V2 = N->getOperand(1);
9964 unsigned ShiftElts = 0, InsertAtByte = 0;
9965 bool Swap = false;
9966
9967 // Shifts required to get the byte we want at element 7.
9968 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9969 0, 15, 14, 13, 12, 11, 10, 9};
9970 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9971 1, 2, 3, 4, 5, 6, 7, 8};
9972
9973 ArrayRef<int> Mask = N->getMask();
9974 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9975
9976 // For each mask element, find out if we're just inserting something
9977 // from V2 into V1 or vice versa.
9978 // Possible permutations inserting an element from V2 into V1:
9979 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9980 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9981 // ...
9982 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9983 // Inserting from V1 into V2 will be similar, except mask range will be
9984 // [16,31].
9985
9986 bool FoundCandidate = false;
9987 // If both vector operands for the shuffle are the same vector, the mask
9988 // will contain only elements from the first one and the second one will be
9989 // undef.
9990 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9991 // Go through the mask of half-words to find an element that's being moved
9992 // from one vector to the other.
9993 for (unsigned i = 0; i < BytesInVector; ++i) {
9994 unsigned CurrentElement = Mask[i];
9995 // If 2nd operand is undefined, we should only look for element 7 in the
9996 // Mask.
9997 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9998 continue;
9999
10000 bool OtherElementsInOrder = true;
10001 // Examine the other elements in the Mask to see if they're in original
10002 // order.
10003 for (unsigned j = 0; j < BytesInVector; ++j) {
10004 if (j == i)
10005 continue;
10006 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10007 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10008 // in which we always assume we're always picking from the 1st operand.
10009 int MaskOffset =
10010 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10011 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10012 OtherElementsInOrder = false;
10013 break;
10014 }
10015 }
10016 // If other elements are in original order, we record the number of shifts
10017 // we need to get the element we want into element 7. Also record which byte
10018 // in the vector we should insert into.
10019 if (OtherElementsInOrder) {
10020 // If 2nd operand is undefined, we assume no shifts and no swapping.
10021 if (V2.isUndef()) {
10022 ShiftElts = 0;
10023 Swap = false;
10024 } else {
10025 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10026 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10027 : BigEndianShifts[CurrentElement & 0xF];
10028 Swap = CurrentElement < BytesInVector;
10029 }
10030 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10031 FoundCandidate = true;
10032 break;
10033 }
10034 }
10035
10036 if (!FoundCandidate)
10037 return SDValue();
10038
10039 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10040 // optionally with VECSHL if shift is required.
10041 if (Swap)
10042 std::swap(V1, V2);
10043 if (V2.isUndef())
10044 V2 = V1;
10045 if (ShiftElts) {
10046 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10047 DAG.getConstant(ShiftElts, dl, MVT::i32));
10048 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10049 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10050 }
10051 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10052 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10053}
10054
10055/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10056/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10057/// SDValue.
10058SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10059 SelectionDAG &DAG) const {
10060 const unsigned NumHalfWords = 8;
10061 const unsigned BytesInVector = NumHalfWords * 2;
10062 // Check that the shuffle is on half-words.
10063 if (!isNByteElemShuffleMask(N, 2, 1))
10064 return SDValue();
10065
10066 bool IsLE = Subtarget.isLittleEndian();
10067 SDLoc dl(N);
10068 SDValue V1 = N->getOperand(0);
10069 SDValue V2 = N->getOperand(1);
10070 unsigned ShiftElts = 0, InsertAtByte = 0;
10071 bool Swap = false;
10072
10073 // Shifts required to get the half-word we want at element 3.
10074 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10075 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10076
10077 uint32_t Mask = 0;
10078 uint32_t OriginalOrderLow = 0x1234567;
10079 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10080 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10081 // 32-bit space, only need 4-bit nibbles per element.
10082 for (unsigned i = 0; i < NumHalfWords; ++i) {
10083 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10084 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10085 }
10086
10087 // For each mask element, find out if we're just inserting something
10088 // from V2 into V1 or vice versa. Possible permutations inserting an element
10089 // from V2 into V1:
10090 // X, 1, 2, 3, 4, 5, 6, 7
10091 // 0, X, 2, 3, 4, 5, 6, 7
10092 // 0, 1, X, 3, 4, 5, 6, 7
10093 // 0, 1, 2, X, 4, 5, 6, 7
10094 // 0, 1, 2, 3, X, 5, 6, 7
10095 // 0, 1, 2, 3, 4, X, 6, 7
10096 // 0, 1, 2, 3, 4, 5, X, 7
10097 // 0, 1, 2, 3, 4, 5, 6, X
10098 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10099
10100 bool FoundCandidate = false;
10101 // Go through the mask of half-words to find an element that's being moved
10102 // from one vector to the other.
10103 for (unsigned i = 0; i < NumHalfWords; ++i) {
10104 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10105 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10106 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10107 uint32_t TargetOrder = 0x0;
10108
10109 // If both vector operands for the shuffle are the same vector, the mask
10110 // will contain only elements from the first one and the second one will be
10111 // undef.
10112 if (V2.isUndef()) {
10113 ShiftElts = 0;
10114 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10115 TargetOrder = OriginalOrderLow;
10116 Swap = false;
10117 // Skip if not the correct element or mask of other elements don't equal
10118 // to our expected order.
10119 if (MaskOneElt == VINSERTHSrcElem &&
10120 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10121 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10122 FoundCandidate = true;
10123 break;
10124 }
10125 } else { // If both operands are defined.
10126 // Target order is [8,15] if the current mask is between [0,7].
10127 TargetOrder =
10128 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10129 // Skip if mask of other elements don't equal our expected order.
10130 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10131 // We only need the last 3 bits for the number of shifts.
10132 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10133 : BigEndianShifts[MaskOneElt & 0x7];
10134 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10135 Swap = MaskOneElt < NumHalfWords;
10136 FoundCandidate = true;
10137 break;
10138 }
10139 }
10140 }
10141
10142 if (!FoundCandidate)
10143 return SDValue();
10144
10145 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10146 // optionally with VECSHL if shift is required.
10147 if (Swap)
10148 std::swap(V1, V2);
10149 if (V2.isUndef())
10150 V2 = V1;
10151 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10152 if (ShiftElts) {
10153 // Double ShiftElts because we're left shifting on v16i8 type.
10154 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10155 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10156 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10157 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10158 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10159 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10160 }
10161 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10162 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10163 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10164 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10165}
10166
10167/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10168/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10169/// return the default SDValue.
10170SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10171 SelectionDAG &DAG) const {
10172 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10173 // to v16i8. Peek through the bitcasts to get the actual operands.
10174 SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
10175 SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
10176
10177 auto ShuffleMask = SVN->getMask();
10178 SDValue VecShuffle(SVN, 0);
10179 SDLoc DL(SVN);
10180
10181 // Check that we have a four byte shuffle.
10182 if (!isNByteElemShuffleMask(SVN, 4, 1))
10183 return SDValue();
10184
10185 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10186 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10187 std::swap(LHS, RHS);
10189 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10190 if (!CommutedSV)
10191 return SDValue();
10192 ShuffleMask = CommutedSV->getMask();
10193 }
10194
10195 // Ensure that the RHS is a vector of constants.
10196 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10197 if (!BVN)
10198 return SDValue();
10199
10200 // Check if RHS is a splat of 4-bytes (or smaller).
10201 APInt APSplatValue, APSplatUndef;
10202 unsigned SplatBitSize;
10203 bool HasAnyUndefs;
10204 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10205 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10206 SplatBitSize > 32)
10207 return SDValue();
10208
10209 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10210 // The instruction splats a constant C into two words of the source vector
10211 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10212 // Thus we check that the shuffle mask is the equivalent of
10213 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10214 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10215 // within each word are consecutive, so we only need to check the first byte.
10216 SDValue Index;
10217 bool IsLE = Subtarget.isLittleEndian();
10218 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10219 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10220 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10221 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10222 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10223 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10224 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10225 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10226 else
10227 return SDValue();
10228
10229 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10230 // for XXSPLTI32DX.
10231 unsigned SplatVal = APSplatValue.getZExtValue();
10232 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10233 SplatVal |= (SplatVal << SplatBitSize);
10234
10235 SDValue SplatNode = DAG.getNode(
10236 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10237 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10238 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10239}
10240
10241/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10242/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10243/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10244/// i.e (or (shl x, C1), (srl x, 128-C1)).
10245SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10246 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10247 assert(Op.getValueType() == MVT::v1i128 &&
10248 "Only set v1i128 as custom, other type shouldn't reach here!");
10249 SDLoc dl(Op);
10250 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10251 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10252 unsigned SHLAmt = N1.getConstantOperandVal(0);
10253 if (SHLAmt % 8 == 0) {
10254 std::array<int, 16> Mask;
10255 std::iota(Mask.begin(), Mask.end(), 0);
10256 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10257 if (SDValue Shuffle =
10258 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10259 DAG.getUNDEF(MVT::v16i8), Mask))
10260 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10261 }
10262 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10263 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10264 DAG.getConstant(SHLAmt, dl, MVT::i32));
10265 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10266 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10267 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10268 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10269}
10270
10271/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10272/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10273/// return the code it can be lowered into. Worst case, it can always be
10274/// lowered into a vperm.
10275SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10276 SelectionDAG &DAG) const {
10277 SDLoc dl(Op);
10278 SDValue V1 = Op.getOperand(0);
10279 SDValue V2 = Op.getOperand(1);
10281
10282 // Any nodes that were combined in the target-independent combiner prior
10283 // to vector legalization will not be sent to the target combine. Try to
10284 // combine it here.
10285 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10286 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10287 return NewShuffle;
10288 Op = NewShuffle;
10290 V1 = Op.getOperand(0);
10291 V2 = Op.getOperand(1);
10292 }
10293 EVT VT = Op.getValueType();
10294 bool isLittleEndian = Subtarget.isLittleEndian();
10295
10296 unsigned ShiftElts, InsertAtByte;
10297 bool Swap = false;
10298
10299 // If this is a load-and-splat, we can do that with a single instruction
10300 // in some cases. However if the load has multiple uses, we don't want to
10301 // combine it because that will just produce multiple loads.
10302 bool IsPermutedLoad = false;
10303 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10304 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10305 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10306 InputLoad->hasOneUse()) {
10307 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10308 int SplatIdx =
10309 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10310
10311 // The splat index for permuted loads will be in the left half of the vector
10312 // which is strictly wider than the loaded value by 8 bytes. So we need to
10313 // adjust the splat index to point to the correct address in memory.
10314 if (IsPermutedLoad) {
10315 assert((isLittleEndian || IsFourByte) &&
10316 "Unexpected size for permuted load on big endian target");
10317 SplatIdx += IsFourByte ? 2 : 1;
10318 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10319 "Splat of a value outside of the loaded memory");
10320 }
10321
10322 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10323 // For 4-byte load-and-splat, we need Power9.
10324 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10325 uint64_t Offset = 0;
10326 if (IsFourByte)
10327 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10328 else
10329 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10330
10331 // If the width of the load is the same as the width of the splat,
10332 // loading with an offset would load the wrong memory.
10333 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10334 Offset = 0;
10335
10336 SDValue BasePtr = LD->getBasePtr();
10337 if (Offset != 0)
10339 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10340 SDValue Ops[] = {
10341 LD->getChain(), // Chain
10342 BasePtr, // BasePtr
10343 DAG.getValueType(Op.getValueType()) // VT
10344 };
10345 SDVTList VTL =
10346 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10347 SDValue LdSplt =
10349 Ops, LD->getMemoryVT(), LD->getMemOperand());
10350 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10351 if (LdSplt.getValueType() != SVOp->getValueType(0))
10352 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10353 return LdSplt;
10354 }
10355 }
10356
10357 // All v2i64 and v2f64 shuffles are legal
10358 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10359 return Op;
10360
10361 if (Subtarget.hasP9Vector() &&
10362 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10363 isLittleEndian)) {
10364 if (V2.isUndef())
10365 V2 = V1;
10366 else if (Swap)
10367 std::swap(V1, V2);
10368 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10369 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10370 if (ShiftElts) {
10371 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10372 DAG.getConstant(ShiftElts, dl, MVT::i32));
10373 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10374 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10375 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10376 }
10377 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10378 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10379 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10380 }
10381
10382 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10383 SDValue SplatInsertNode;
10384 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10385 return SplatInsertNode;
10386 }
10387
10388 if (Subtarget.hasP9Altivec()) {
10389 SDValue NewISDNode;
10390 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10391 return NewISDNode;
10392
10393 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10394 return NewISDNode;
10395 }
10396
10397 if (Subtarget.hasVSX() &&
10398 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10399 if (Swap)
10400 std::swap(V1, V2);
10401 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10402 SDValue Conv2 =
10403 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10404
10405 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10406 DAG.getConstant(ShiftElts, dl, MVT::i32));
10407 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10408 }
10409
10410 if (Subtarget.hasVSX() &&
10411 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10412 if (Swap)
10413 std::swap(V1, V2);
10414 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10415 SDValue Conv2 =
10416 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10417
10418 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10419 DAG.getConstant(ShiftElts, dl, MVT::i32));
10420 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10421 }
10422
10423 if (Subtarget.hasP9Vector()) {
10424 if (PPC::isXXBRHShuffleMask(SVOp)) {
10425 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10426 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10427 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10428 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10429 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10430 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10431 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10432 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10433 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10434 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10435 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10436 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10437 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10438 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10439 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10440 }
10441 }
10442
10443 if (Subtarget.hasVSX()) {
10444 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10445 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10446
10447 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10448 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10449 DAG.getConstant(SplatIdx, dl, MVT::i32));
10450 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10451 }
10452
10453 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10454 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10455 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10456 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10457 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10458 }
10459 }
10460
10461 // Cases that are handled by instructions that take permute immediates
10462 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10463 // selected by the instruction selector.
10464 if (V2.isUndef()) {
10465 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10466 PPC::isSplatShuffleMask(SVOp, 2) ||
10467 PPC::isSplatShuffleMask(SVOp, 4) ||
10468 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10469 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10470 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10471 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10472 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10473 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10474 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10475 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10476 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10477 (Subtarget.hasP8Altivec() && (
10478 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10479 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10480 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10481 return Op;
10482 }
10483 }
10484
10485 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10486 // and produce a fixed permutation. If any of these match, do not lower to
10487 // VPERM.
10488 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10489 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10490 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10491 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10492 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10493 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10494 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10495 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10496 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10497 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10498 (Subtarget.hasP8Altivec() && (
10499 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10500 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10501 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10502 return Op;
10503
10504 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10505 // perfect shuffle table to emit an optimal matching sequence.
10506 ArrayRef<int> PermMask = SVOp->getMask();
10507
10508 if (!DisablePerfectShuffle && !isLittleEndian) {
10509 unsigned PFIndexes[4];
10510 bool isFourElementShuffle = true;
10511 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10512 ++i) { // Element number
10513 unsigned EltNo = 8; // Start out undef.
10514 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10515 if (PermMask[i * 4 + j] < 0)
10516 continue; // Undef, ignore it.
10517
10518 unsigned ByteSource = PermMask[i * 4 + j];
10519 if ((ByteSource & 3) != j) {
10520 isFourElementShuffle = false;
10521 break;
10522 }
10523
10524 if (EltNo == 8) {
10525 EltNo = ByteSource / 4;
10526 } else if (EltNo != ByteSource / 4) {
10527 isFourElementShuffle = false;
10528 break;
10529 }
10530 }
10531 PFIndexes[i] = EltNo;
10532 }
10533
10534 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10535 // perfect shuffle vector to determine if it is cost effective to do this as
10536 // discrete instructions, or whether we should use a vperm.
10537 // For now, we skip this for little endian until such time as we have a
10538 // little-endian perfect shuffle table.
10539 if (isFourElementShuffle) {
10540 // Compute the index in the perfect shuffle table.
10541 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10542 PFIndexes[2] * 9 + PFIndexes[3];
10543
10544 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10545 unsigned Cost = (PFEntry >> 30);
10546
10547 // Determining when to avoid vperm is tricky. Many things affect the cost
10548 // of vperm, particularly how many times the perm mask needs to be
10549 // computed. For example, if the perm mask can be hoisted out of a loop or
10550 // is already used (perhaps because there are multiple permutes with the
10551 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10552 // permute mask out of the loop requires an extra register.
10553 //
10554 // As a compromise, we only emit discrete instructions if the shuffle can
10555 // be generated in 3 or fewer operations. When we have loop information
10556 // available, if this block is within a loop, we should avoid using vperm
10557 // for 3-operation perms and use a constant pool load instead.
10558 if (Cost < 3)
10559 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10560 }
10561 }
10562
10563 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10564 // vector that will get spilled to the constant pool.
10565 if (V2.isUndef()) V2 = V1;
10566
10567 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10568}
10569
10570SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10571 ArrayRef<int> PermMask, EVT VT,
10572 SDValue V1, SDValue V2) const {
10573 unsigned Opcode = PPCISD::VPERM;
10574 EVT ValType = V1.getValueType();
10575 SDLoc dl(Op);
10576 bool NeedSwap = false;
10577 bool isLittleEndian = Subtarget.isLittleEndian();
10578 bool isPPC64 = Subtarget.isPPC64();
10579
10580 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10581 (V1->hasOneUse() || V2->hasOneUse())) {
10582 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10583 "XXPERM instead\n");
10584 Opcode = PPCISD::XXPERM;
10585
10586 // The second input to XXPERM is also an output so if the second input has
10587 // multiple uses then copying is necessary, as a result we want the
10588 // single-use operand to be used as the second input to prevent copying.
10589 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10590 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10591 std::swap(V1, V2);
10592 NeedSwap = !NeedSwap;
10593 }
10594 }
10595
10596 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10597 // that it is in input element units, not in bytes. Convert now.
10598
10599 // For little endian, the order of the input vectors is reversed, and
10600 // the permutation mask is complemented with respect to 31. This is
10601 // necessary to produce proper semantics with the big-endian-based vperm
10602 // instruction.
10603 EVT EltVT = V1.getValueType().getVectorElementType();
10604 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10605
10606 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10607 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10608
10609 /*
10610 Vectors will be appended like so: [ V1 | v2 ]
10611 XXSWAPD on V1:
10612 [ A | B | C | D ] -> [ C | D | A | B ]
10613 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10614 i.e. index of A, B += 8, and index of C, D -= 8.
10615 XXSWAPD on V2:
10616 [ E | F | G | H ] -> [ G | H | E | F ]
10617 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10618 i.e. index of E, F += 8, index of G, H -= 8
10619 Swap V1 and V2:
10620 [ V1 | V2 ] -> [ V2 | V1 ]
10621 0-15 16-31 0-15 16-31
10622 i.e. index of V1 += 16, index of V2 -= 16
10623 */
10624
10625 SmallVector<SDValue, 16> ResultMask;
10626 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10627 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10628
10629 if (V1HasXXSWAPD) {
10630 if (SrcElt < 8)
10631 SrcElt += 8;
10632 else if (SrcElt < 16)
10633 SrcElt -= 8;
10634 }
10635 if (V2HasXXSWAPD) {
10636 if (SrcElt > 23)
10637 SrcElt -= 8;
10638 else if (SrcElt > 15)
10639 SrcElt += 8;
10640 }
10641 if (NeedSwap) {
10642 if (SrcElt < 16)
10643 SrcElt += 16;
10644 else
10645 SrcElt -= 16;
10646 }
10647 for (unsigned j = 0; j != BytesPerElement; ++j)
10648 if (isLittleEndian)
10649 ResultMask.push_back(
10650 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10651 else
10652 ResultMask.push_back(
10653 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10654 }
10655
10656 if (V1HasXXSWAPD) {
10657 dl = SDLoc(V1->getOperand(0));
10658 V1 = V1->getOperand(0)->getOperand(1);
10659 }
10660 if (V2HasXXSWAPD) {
10661 dl = SDLoc(V2->getOperand(0));
10662 V2 = V2->getOperand(0)->getOperand(1);
10663 }
10664
10665 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10666 if (ValType != MVT::v2f64)
10667 V1 = DAG.getBitcast(MVT::v2f64, V1);
10668 if (V2.getValueType() != MVT::v2f64)
10669 V2 = DAG.getBitcast(MVT::v2f64, V2);
10670 }
10671
10672 ShufflesHandledWithVPERM++;
10673 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10674 LLVM_DEBUG({
10676 if (Opcode == PPCISD::XXPERM) {
10677 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10678 } else {
10679 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10680 }
10681 SVOp->dump();
10682 dbgs() << "With the following permute control vector:\n";
10683 VPermMask.dump();
10684 });
10685
10686 if (Opcode == PPCISD::XXPERM)
10687 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10688
10689 // Only need to place items backwards in LE,
10690 // the mask was properly calculated.
10691 if (isLittleEndian)
10692 std::swap(V1, V2);
10693
10694 SDValue VPERMNode =
10695 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10696
10697 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10698 return VPERMNode;
10699}
10700
10701/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10702/// vector comparison. If it is, return true and fill in Opc/isDot with
10703/// information about the intrinsic.
10704static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10705 bool &isDot, const PPCSubtarget &Subtarget) {
10706 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10707 CompareOpc = -1;
10708 isDot = false;
10709 switch (IntrinsicID) {
10710 default:
10711 return false;
10712 // Comparison predicates.
10713 case Intrinsic::ppc_altivec_vcmpbfp_p:
10714 CompareOpc = 966;
10715 isDot = true;
10716 break;
10717 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10718 CompareOpc = 198;
10719 isDot = true;
10720 break;
10721 case Intrinsic::ppc_altivec_vcmpequb_p:
10722 CompareOpc = 6;
10723 isDot = true;
10724 break;
10725 case Intrinsic::ppc_altivec_vcmpequh_p:
10726 CompareOpc = 70;
10727 isDot = true;
10728 break;
10729 case Intrinsic::ppc_altivec_vcmpequw_p:
10730 CompareOpc = 134;
10731 isDot = true;
10732 break;
10733 case Intrinsic::ppc_altivec_vcmpequd_p:
10734 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10735 CompareOpc = 199;
10736 isDot = true;
10737 } else
10738 return false;
10739 break;
10740 case Intrinsic::ppc_altivec_vcmpneb_p:
10741 case Intrinsic::ppc_altivec_vcmpneh_p:
10742 case Intrinsic::ppc_altivec_vcmpnew_p:
10743 case Intrinsic::ppc_altivec_vcmpnezb_p:
10744 case Intrinsic::ppc_altivec_vcmpnezh_p:
10745 case Intrinsic::ppc_altivec_vcmpnezw_p:
10746 if (Subtarget.hasP9Altivec()) {
10747 switch (IntrinsicID) {
10748 default:
10749 llvm_unreachable("Unknown comparison intrinsic.");
10750 case Intrinsic::ppc_altivec_vcmpneb_p:
10751 CompareOpc = 7;
10752 break;
10753 case Intrinsic::ppc_altivec_vcmpneh_p:
10754 CompareOpc = 71;
10755 break;
10756 case Intrinsic::ppc_altivec_vcmpnew_p:
10757 CompareOpc = 135;
10758 break;
10759 case Intrinsic::ppc_altivec_vcmpnezb_p:
10760 CompareOpc = 263;
10761 break;
10762 case Intrinsic::ppc_altivec_vcmpnezh_p:
10763 CompareOpc = 327;
10764 break;
10765 case Intrinsic::ppc_altivec_vcmpnezw_p:
10766 CompareOpc = 391;
10767 break;
10768 }
10769 isDot = true;
10770 } else
10771 return false;
10772 break;
10773 case Intrinsic::ppc_altivec_vcmpgefp_p:
10774 CompareOpc = 454;
10775 isDot = true;
10776 break;
10777 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10778 CompareOpc = 710;
10779 isDot = true;
10780 break;
10781 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10782 CompareOpc = 774;
10783 isDot = true;
10784 break;
10785 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10786 CompareOpc = 838;
10787 isDot = true;
10788 break;
10789 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10790 CompareOpc = 902;
10791 isDot = true;
10792 break;
10793 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10794 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10795 CompareOpc = 967;
10796 isDot = true;
10797 } else
10798 return false;
10799 break;
10800 case Intrinsic::ppc_altivec_vcmpgtub_p:
10801 CompareOpc = 518;
10802 isDot = true;
10803 break;
10804 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10805 CompareOpc = 582;
10806 isDot = true;
10807 break;
10808 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10809 CompareOpc = 646;
10810 isDot = true;
10811 break;
10812 case Intrinsic::ppc_altivec_vcmpgtud_p:
10813 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10814 CompareOpc = 711;
10815 isDot = true;
10816 } else
10817 return false;
10818 break;
10819
10820 case Intrinsic::ppc_altivec_vcmpequq:
10821 case Intrinsic::ppc_altivec_vcmpgtsq:
10822 case Intrinsic::ppc_altivec_vcmpgtuq:
10823 if (!Subtarget.isISA3_1())
10824 return false;
10825 switch (IntrinsicID) {
10826 default:
10827 llvm_unreachable("Unknown comparison intrinsic.");
10828 case Intrinsic::ppc_altivec_vcmpequq:
10829 CompareOpc = 455;
10830 break;
10831 case Intrinsic::ppc_altivec_vcmpgtsq:
10832 CompareOpc = 903;
10833 break;
10834 case Intrinsic::ppc_altivec_vcmpgtuq:
10835 CompareOpc = 647;
10836 break;
10837 }
10838 break;
10839
10840 // VSX predicate comparisons use the same infrastructure
10841 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10842 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10843 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10844 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10845 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10846 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10847 if (Subtarget.hasVSX()) {
10848 switch (IntrinsicID) {
10849 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10850 CompareOpc = 99;
10851 break;
10852 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10853 CompareOpc = 115;
10854 break;
10855 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10856 CompareOpc = 107;
10857 break;
10858 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10859 CompareOpc = 67;
10860 break;
10861 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10862 CompareOpc = 83;
10863 break;
10864 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10865 CompareOpc = 75;
10866 break;
10867 }
10868 isDot = true;
10869 } else
10870 return false;
10871 break;
10872
10873 // Normal Comparisons.
10874 case Intrinsic::ppc_altivec_vcmpbfp:
10875 CompareOpc = 966;
10876 break;
10877 case Intrinsic::ppc_altivec_vcmpeqfp:
10878 CompareOpc = 198;
10879 break;
10880 case Intrinsic::ppc_altivec_vcmpequb:
10881 CompareOpc = 6;
10882 break;
10883 case Intrinsic::ppc_altivec_vcmpequh:
10884 CompareOpc = 70;
10885 break;
10886 case Intrinsic::ppc_altivec_vcmpequw:
10887 CompareOpc = 134;
10888 break;
10889 case Intrinsic::ppc_altivec_vcmpequd:
10890 if (Subtarget.hasP8Altivec())
10891 CompareOpc = 199;
10892 else
10893 return false;
10894 break;
10895 case Intrinsic::ppc_altivec_vcmpneb:
10896 case Intrinsic::ppc_altivec_vcmpneh:
10897 case Intrinsic::ppc_altivec_vcmpnew:
10898 case Intrinsic::ppc_altivec_vcmpnezb:
10899 case Intrinsic::ppc_altivec_vcmpnezh:
10900 case Intrinsic::ppc_altivec_vcmpnezw:
10901 if (Subtarget.hasP9Altivec())
10902 switch (IntrinsicID) {
10903 default:
10904 llvm_unreachable("Unknown comparison intrinsic.");
10905 case Intrinsic::ppc_altivec_vcmpneb:
10906 CompareOpc = 7;
10907 break;
10908 case Intrinsic::ppc_altivec_vcmpneh:
10909 CompareOpc = 71;
10910 break;
10911 case Intrinsic::ppc_altivec_vcmpnew:
10912 CompareOpc = 135;
10913 break;
10914 case Intrinsic::ppc_altivec_vcmpnezb:
10915 CompareOpc = 263;
10916 break;
10917 case Intrinsic::ppc_altivec_vcmpnezh:
10918 CompareOpc = 327;
10919 break;
10920 case Intrinsic::ppc_altivec_vcmpnezw:
10921 CompareOpc = 391;
10922 break;
10923 }
10924 else
10925 return false;
10926 break;
10927 case Intrinsic::ppc_altivec_vcmpgefp:
10928 CompareOpc = 454;
10929 break;
10930 case Intrinsic::ppc_altivec_vcmpgtfp:
10931 CompareOpc = 710;
10932 break;
10933 case Intrinsic::ppc_altivec_vcmpgtsb:
10934 CompareOpc = 774;
10935 break;
10936 case Intrinsic::ppc_altivec_vcmpgtsh:
10937 CompareOpc = 838;
10938 break;
10939 case Intrinsic::ppc_altivec_vcmpgtsw:
10940 CompareOpc = 902;
10941 break;
10942 case Intrinsic::ppc_altivec_vcmpgtsd:
10943 if (Subtarget.hasP8Altivec())
10944 CompareOpc = 967;
10945 else
10946 return false;
10947 break;
10948 case Intrinsic::ppc_altivec_vcmpgtub:
10949 CompareOpc = 518;
10950 break;
10951 case Intrinsic::ppc_altivec_vcmpgtuh:
10952 CompareOpc = 582;
10953 break;
10954 case Intrinsic::ppc_altivec_vcmpgtuw:
10955 CompareOpc = 646;
10956 break;
10957 case Intrinsic::ppc_altivec_vcmpgtud:
10958 if (Subtarget.hasP8Altivec())
10959 CompareOpc = 711;
10960 else
10961 return false;
10962 break;
10963 case Intrinsic::ppc_altivec_vcmpequq_p:
10964 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10965 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10966 if (!Subtarget.isISA3_1())
10967 return false;
10968 switch (IntrinsicID) {
10969 default:
10970 llvm_unreachable("Unknown comparison intrinsic.");
10971 case Intrinsic::ppc_altivec_vcmpequq_p:
10972 CompareOpc = 455;
10973 break;
10974 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10975 CompareOpc = 903;
10976 break;
10977 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10978 CompareOpc = 647;
10979 break;
10980 }
10981 isDot = true;
10982 break;
10983 }
10984 return true;
10985}
10986
10987/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10988/// lower, do it, otherwise return null.
10989SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10990 SelectionDAG &DAG) const {
10991 unsigned IntrinsicID = Op.getConstantOperandVal(0);
10992
10993 SDLoc dl(Op);
10994
10995 switch (IntrinsicID) {
10996 case Intrinsic::thread_pointer:
10997 // Reads the thread pointer register, used for __builtin_thread_pointer.
10998 if (Subtarget.isPPC64())
10999 return DAG.getRegister(PPC::X13, MVT::i64);
11000 return DAG.getRegister(PPC::R2, MVT::i32);
11001
11002 case Intrinsic::ppc_rldimi: {
11003 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11004 SDValue Src = Op.getOperand(1);
11005 APInt Mask = Op.getConstantOperandAPInt(4);
11006 if (Mask.isZero())
11007 return Op.getOperand(2);
11008 if (Mask.isAllOnes())
11009 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11010 uint64_t SH = Op.getConstantOperandVal(3);
11011 unsigned MB = 0, ME = 0;
11012 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11013 report_fatal_error("invalid rldimi mask!");
11014 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11015 if (ME < 63 - SH) {
11016 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11017 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11018 } else if (ME > 63 - SH) {
11019 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11020 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11021 }
11022 return SDValue(
11023 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11024 {Op.getOperand(2), Src,
11025 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11026 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11027 0);
11028 }
11029
11030 case Intrinsic::ppc_rlwimi: {
11031 APInt Mask = Op.getConstantOperandAPInt(4);
11032 if (Mask.isZero())
11033 return Op.getOperand(2);
11034 if (Mask.isAllOnes())
11035 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11036 Op.getOperand(3));
11037 unsigned MB = 0, ME = 0;
11038 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11039 report_fatal_error("invalid rlwimi mask!");
11040 return SDValue(DAG.getMachineNode(
11041 PPC::RLWIMI, dl, MVT::i32,
11042 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11043 DAG.getTargetConstant(MB, dl, MVT::i32),
11044 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11045 0);
11046 }
11047
11048 case Intrinsic::ppc_rlwnm: {
11049 if (Op.getConstantOperandVal(3) == 0)
11050 return DAG.getConstant(0, dl, MVT::i32);
11051 unsigned MB = 0, ME = 0;
11052 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11053 report_fatal_error("invalid rlwnm mask!");
11054 return SDValue(
11055 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11056 {Op.getOperand(1), Op.getOperand(2),
11057 DAG.getTargetConstant(MB, dl, MVT::i32),
11058 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11059 0);
11060 }
11061
11062 case Intrinsic::ppc_mma_disassemble_acc: {
11063 if (Subtarget.isISAFuture()) {
11064 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11065 SDValue WideVec =
11066 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11067 Op.getOperand(1)),
11068 0);
11070 SDValue Value = SDValue(WideVec.getNode(), 0);
11071 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11072
11073 SDValue Extract;
11074 Extract = DAG.getNode(
11075 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11076 Subtarget.isLittleEndian() ? Value2 : Value,
11077 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11078 dl, getPointerTy(DAG.getDataLayout())));
11079 RetOps.push_back(Extract);
11080 Extract = DAG.getNode(
11081 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11082 Subtarget.isLittleEndian() ? Value2 : Value,
11083 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11084 dl, getPointerTy(DAG.getDataLayout())));
11085 RetOps.push_back(Extract);
11086 Extract = DAG.getNode(
11087 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11088 Subtarget.isLittleEndian() ? Value : Value2,
11089 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11090 dl, getPointerTy(DAG.getDataLayout())));
11091 RetOps.push_back(Extract);
11092 Extract = DAG.getNode(
11093 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11094 Subtarget.isLittleEndian() ? Value : Value2,
11095 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11096 dl, getPointerTy(DAG.getDataLayout())));
11097 RetOps.push_back(Extract);
11098 return DAG.getMergeValues(RetOps, dl);
11099 }
11100 [[fallthrough]];
11101 }
11102 case Intrinsic::ppc_vsx_disassemble_pair: {
11103 int NumVecs = 2;
11104 SDValue WideVec = Op.getOperand(1);
11105 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11106 NumVecs = 4;
11107 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11108 }
11110 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11111 SDValue Extract = DAG.getNode(
11112 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11113 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11114 : VecNo,
11115 dl, getPointerTy(DAG.getDataLayout())));
11116 RetOps.push_back(Extract);
11117 }
11118 return DAG.getMergeValues(RetOps, dl);
11119 }
11120
11121 case Intrinsic::ppc_mma_xxmfacc:
11122 case Intrinsic::ppc_mma_xxmtacc: {
11123 // Allow pre-isa-future subtargets to lower as normal.
11124 if (!Subtarget.isISAFuture())
11125 return SDValue();
11126 // The intrinsics for xxmtacc and xxmfacc take one argument of
11127 // type v512i1, for future cpu the corresponding wacc instruction
11128 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11129 // the need to produce the xxm[t|f]acc.
11130 SDValue WideVec = Op.getOperand(1);
11131 DAG.ReplaceAllUsesWith(Op, WideVec);
11132 return SDValue();
11133 }
11134
11135 case Intrinsic::ppc_unpack_longdouble: {
11136 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11137 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11138 "Argument of long double unpack must be 0 or 1!");
11139 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11140 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11141 Idx->getValueType(0)));
11142 }
11143
11144 case Intrinsic::ppc_compare_exp_lt:
11145 case Intrinsic::ppc_compare_exp_gt:
11146 case Intrinsic::ppc_compare_exp_eq:
11147 case Intrinsic::ppc_compare_exp_uo: {
11148 unsigned Pred;
11149 switch (IntrinsicID) {
11150 case Intrinsic::ppc_compare_exp_lt:
11151 Pred = PPC::PRED_LT;
11152 break;
11153 case Intrinsic::ppc_compare_exp_gt:
11154 Pred = PPC::PRED_GT;
11155 break;
11156 case Intrinsic::ppc_compare_exp_eq:
11157 Pred = PPC::PRED_EQ;
11158 break;
11159 case Intrinsic::ppc_compare_exp_uo:
11160 Pred = PPC::PRED_UN;
11161 break;
11162 }
11163 return SDValue(
11164 DAG.getMachineNode(
11165 PPC::SELECT_CC_I4, dl, MVT::i32,
11166 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11167 Op.getOperand(1), Op.getOperand(2)),
11168 0),
11169 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11170 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11171 0);
11172 }
11173 case Intrinsic::ppc_test_data_class: {
11174 EVT OpVT = Op.getOperand(1).getValueType();
11175 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11176 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11177 : PPC::XSTSTDCSP);
11178 return SDValue(
11179 DAG.getMachineNode(
11180 PPC::SELECT_CC_I4, dl, MVT::i32,
11181 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11182 Op.getOperand(1)),
11183 0),
11184 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11185 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11186 0);
11187 }
11188 case Intrinsic::ppc_fnmsub: {
11189 EVT VT = Op.getOperand(1).getValueType();
11190 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11191 return DAG.getNode(
11192 ISD::FNEG, dl, VT,
11193 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11194 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11195 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11196 Op.getOperand(2), Op.getOperand(3));
11197 }
11198 case Intrinsic::ppc_convert_f128_to_ppcf128:
11199 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11200 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11201 ? RTLIB::CONVERT_PPCF128_F128
11202 : RTLIB::CONVERT_F128_PPCF128;
11203 MakeLibCallOptions CallOptions;
11204 std::pair<SDValue, SDValue> Result =
11205 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11206 dl, SDValue());
11207 return Result.first;
11208 }
11209 case Intrinsic::ppc_maxfe:
11210 case Intrinsic::ppc_maxfl:
11211 case Intrinsic::ppc_maxfs:
11212 case Intrinsic::ppc_minfe:
11213 case Intrinsic::ppc_minfl:
11214 case Intrinsic::ppc_minfs: {
11215 EVT VT = Op.getValueType();
11216 assert(
11217 all_of(Op->ops().drop_front(4),
11218 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11219 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11220 (void)VT;
11222 if (IntrinsicID == Intrinsic::ppc_minfe ||
11223 IntrinsicID == Intrinsic::ppc_minfl ||
11224 IntrinsicID == Intrinsic::ppc_minfs)
11225 CC = ISD::SETLT;
11226 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11227 SDValue Res = Op.getOperand(I);
11228 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11229 Res =
11230 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11231 }
11232 return Res;
11233 }
11234 }
11235
11236 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11237 // opcode number of the comparison.
11238 int CompareOpc;
11239 bool isDot;
11240 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11241 return SDValue(); // Don't custom lower most intrinsics.
11242
11243 // If this is a non-dot comparison, make the VCMP node and we are done.
11244 if (!isDot) {
11245 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11246 Op.getOperand(1), Op.getOperand(2),
11247 DAG.getConstant(CompareOpc, dl, MVT::i32));
11248 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11249 }
11250
11251 // Create the PPCISD altivec 'dot' comparison node.
11252 SDValue Ops[] = {
11253 Op.getOperand(2), // LHS
11254 Op.getOperand(3), // RHS
11255 DAG.getConstant(CompareOpc, dl, MVT::i32)
11256 };
11257 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11258 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11259
11260 // Now that we have the comparison, emit a copy from the CR to a GPR.
11261 // This is flagged to the above dot comparison.
11262 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11263 DAG.getRegister(PPC::CR6, MVT::i32),
11264 CompNode.getValue(1));
11265
11266 // Unpack the result based on how the target uses it.
11267 unsigned BitNo; // Bit # of CR6.
11268 bool InvertBit; // Invert result?
11269 switch (Op.getConstantOperandVal(1)) {
11270 default: // Can't happen, don't crash on invalid number though.
11271 case 0: // Return the value of the EQ bit of CR6.
11272 BitNo = 0; InvertBit = false;
11273 break;
11274 case 1: // Return the inverted value of the EQ bit of CR6.
11275 BitNo = 0; InvertBit = true;
11276 break;
11277 case 2: // Return the value of the LT bit of CR6.
11278 BitNo = 2; InvertBit = false;
11279 break;
11280 case 3: // Return the inverted value of the LT bit of CR6.
11281 BitNo = 2; InvertBit = true;
11282 break;
11283 }
11284
11285 // Shift the bit into the low position.
11286 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11287 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11288 // Isolate the bit.
11289 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11290 DAG.getConstant(1, dl, MVT::i32));
11291
11292 // If we are supposed to, toggle the bit.
11293 if (InvertBit)
11294 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11295 DAG.getConstant(1, dl, MVT::i32));
11296 return Flags;
11297}
11298
11299SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11300 SelectionDAG &DAG) const {
11301 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11302 // the beginning of the argument list.
11303 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11304 SDLoc DL(Op);
11305 switch (Op.getConstantOperandVal(ArgStart)) {
11306 case Intrinsic::ppc_cfence: {
11307 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11308 SDValue Val = Op.getOperand(ArgStart + 1);
11309 EVT Ty = Val.getValueType();
11310 if (Ty == MVT::i128) {
11311 // FIXME: Testing one of two paired registers is sufficient to guarantee
11312 // ordering?
11313 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11314 }
11315 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11316 EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
11317 return SDValue(
11318 DAG.getMachineNode(Opcode, DL, MVT::Other,
11319 DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
11320 Op.getOperand(0)),
11321 0);
11322 }
11323 default:
11324 break;
11325 }
11326 return SDValue();
11327}
11328
11329// Lower scalar BSWAP64 to xxbrd.
11330SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11331 SDLoc dl(Op);
11332 if (!Subtarget.isPPC64())
11333 return Op;
11334 // MTVSRDD
11335 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11336 Op.getOperand(0));
11337 // XXBRD
11338 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11339 // MFVSRD
11340 int VectorIndex = 0;
11341 if (Subtarget.isLittleEndian())
11342 VectorIndex = 1;
11343 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11344 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11345 return Op;
11346}
11347
11348// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11349// compared to a value that is atomically loaded (atomic loads zero-extend).
11350SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11351 SelectionDAG &DAG) const {
11352 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11353 "Expecting an atomic compare-and-swap here.");
11354 SDLoc dl(Op);
11355 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11356 EVT MemVT = AtomicNode->getMemoryVT();
11357 if (MemVT.getSizeInBits() >= 32)
11358 return Op;
11359
11360 SDValue CmpOp = Op.getOperand(2);
11361 // If this is already correctly zero-extended, leave it alone.
11362 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11363 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11364 return Op;
11365
11366 // Clear the high bits of the compare operand.
11367 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11368 SDValue NewCmpOp =
11369 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11370 DAG.getConstant(MaskVal, dl, MVT::i32));
11371
11372 // Replace the existing compare operand with the properly zero-extended one.
11374 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11375 Ops.push_back(AtomicNode->getOperand(i));
11376 Ops[2] = NewCmpOp;
11377 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11378 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11379 auto NodeTy =
11380 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11381 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11382}
11383
11384SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11385 SelectionDAG &DAG) const {
11386 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11387 EVT MemVT = N->getMemoryVT();
11388 assert(MemVT.getSimpleVT() == MVT::i128 &&
11389 "Expect quadword atomic operations");
11390 SDLoc dl(N);
11391 unsigned Opc = N->getOpcode();
11392 switch (Opc) {
11393 case ISD::ATOMIC_LOAD: {
11394 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11395 // lowered to ppc instructions by pattern matching instruction selector.
11396 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11398 N->getOperand(0),
11399 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11400 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11401 Ops.push_back(N->getOperand(I));
11402 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11403 Ops, MemVT, N->getMemOperand());
11404 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11405 SDValue ValHi =
11406 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11407 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11408 DAG.getConstant(64, dl, MVT::i32));
11409 SDValue Val =
11410 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11411 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11412 {Val, LoadedVal.getValue(2)});
11413 }
11414 case ISD::ATOMIC_STORE: {
11415 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11416 // lowered to ppc instructions by pattern matching instruction selector.
11417 SDVTList Tys = DAG.getVTList(MVT::Other);
11419 N->getOperand(0),
11420 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11421 SDValue Val = N->getOperand(1);
11422 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11423 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11424 DAG.getConstant(64, dl, MVT::i32));
11425 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11426 Ops.push_back(ValLo);
11427 Ops.push_back(ValHi);
11428 Ops.push_back(N->getOperand(2));
11429 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11430 N->getMemOperand());
11431 }
11432 default:
11433 llvm_unreachable("Unexpected atomic opcode");
11434 }
11435}
11436
11438 SelectionDAG &DAG,
11439 const PPCSubtarget &Subtarget) {
11440 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11441
11442 enum DataClassMask {
11443 DC_NAN = 1 << 6,
11444 DC_NEG_INF = 1 << 4,
11445 DC_POS_INF = 1 << 5,
11446 DC_NEG_ZERO = 1 << 2,
11447 DC_POS_ZERO = 1 << 3,
11448 DC_NEG_SUBNORM = 1,
11449 DC_POS_SUBNORM = 1 << 1,
11450 };
11451
11452 EVT VT = Op.getValueType();
11453
11454 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11455 : VT == MVT::f64 ? PPC::XSTSTDCDP
11456 : PPC::XSTSTDCSP;
11457
11458 if (Mask == fcAllFlags)
11459 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11460 if (Mask == 0)
11461 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11462
11463 // When it's cheaper or necessary to test reverse flags.
11464 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11465 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11466 return DAG.getNOT(Dl, Rev, MVT::i1);
11467 }
11468
11469 // Power doesn't support testing whether a value is 'normal'. Test the rest
11470 // first, and test if it's 'not not-normal' with expected sign.
11471 if (Mask & fcNormal) {
11472 SDValue Rev(DAG.getMachineNode(
11473 TestOp, Dl, MVT::i32,
11474 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11475 DC_NEG_ZERO | DC_POS_ZERO |
11476 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11477 Dl, MVT::i32),
11478 Op),
11479 0);
11480 // Sign are stored in CR bit 0, result are in CR bit 2.
11481 SDValue Sign(
11482 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11483 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11484 0);
11485 SDValue Normal(DAG.getNOT(
11486 Dl,
11488 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11489 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11490 0),
11491 MVT::i1));
11492 if (Mask & fcPosNormal)
11493 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11494 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11495 if (Mask == fcPosNormal || Mask == fcNegNormal)
11496 return Result;
11497
11498 return DAG.getNode(
11499 ISD::OR, Dl, MVT::i1,
11500 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11501 }
11502
11503 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11504 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11505 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11506 bool IsQuiet = Mask & fcQNan;
11507 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11508
11509 // Quietness is determined by the first bit in fraction field.
11510 uint64_t QuietMask = 0;
11511 SDValue HighWord;
11512 if (VT == MVT::f128) {
11513 HighWord = DAG.getNode(
11514 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11515 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11516 QuietMask = 0x8000;
11517 } else if (VT == MVT::f64) {
11518 if (Subtarget.isPPC64()) {
11519 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11520 DAG.getBitcast(MVT::i64, Op),
11521 DAG.getConstant(1, Dl, MVT::i32));
11522 } else {
11523 SDValue Vec = DAG.getBitcast(
11524 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11525 HighWord = DAG.getNode(
11526 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11527 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11528 }
11529 QuietMask = 0x80000;
11530 } else if (VT == MVT::f32) {
11531 HighWord = DAG.getBitcast(MVT::i32, Op);
11532 QuietMask = 0x400000;
11533 }
11534 SDValue NanRes = DAG.getSetCC(
11535 Dl, MVT::i1,
11536 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11537 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11538 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11539 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11540 if (Mask == fcQNan || Mask == fcSNan)
11541 return NanRes;
11542
11543 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11544 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11545 NanRes);
11546 }
11547
11548 unsigned NativeMask = 0;
11549 if ((Mask & fcNan) == fcNan)
11550 NativeMask |= DC_NAN;
11551 if (Mask & fcNegInf)
11552 NativeMask |= DC_NEG_INF;
11553 if (Mask & fcPosInf)
11554 NativeMask |= DC_POS_INF;
11555 if (Mask & fcNegZero)
11556 NativeMask |= DC_NEG_ZERO;
11557 if (Mask & fcPosZero)
11558 NativeMask |= DC_POS_ZERO;
11559 if (Mask & fcNegSubnormal)
11560 NativeMask |= DC_NEG_SUBNORM;
11561 if (Mask & fcPosSubnormal)
11562 NativeMask |= DC_POS_SUBNORM;
11563 return SDValue(
11564 DAG.getMachineNode(
11565 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11567 TestOp, Dl, MVT::i32,
11568 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11569 0),
11570 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11571 0);
11572}
11573
11574SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11575 SelectionDAG &DAG) const {
11576 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11577 SDValue LHS = Op.getOperand(0);
11578 uint64_t RHSC = Op.getConstantOperandVal(1);
11579 SDLoc Dl(Op);
11580 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11581 if (LHS.getValueType() == MVT::ppcf128) {
11582 // The higher part determines the value class.
11583 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11584 DAG.getConstant(1, Dl, MVT::i32));
11585 }
11586
11587 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11588}
11589
11590SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11591 SelectionDAG &DAG) const {
11592 SDLoc dl(Op);
11593
11595 SDValue Op0 = Op.getOperand(0);
11596 ReuseLoadInfo RLI;
11597 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11598 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11599 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11600 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11601
11602 MachineMemOperand *MMO =
11604 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11605 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11607 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11608 MVT::i32, MMO);
11609 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
11610 return Bits.getValue(0);
11611 }
11612
11613 // Create a stack slot that is 16-byte aligned.
11614 MachineFrameInfo &MFI = MF.getFrameInfo();
11615 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11616 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11617 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11618
11619 SDValue Val = Op0;
11620 EVT ValVT = Val.getValueType();
11621 // P10 hardware store forwarding requires that a single store contains all
11622 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11623 // to avoid load hit store on P10 when running binaries compiled for older
11624 // processors by generating two mergeable scalar stores to forward with the
11625 // vector load.
11626 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11627 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11628 ValVT.getSizeInBits() <= 64) {
11629 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11630 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11631 SDValue ShiftBy = DAG.getConstant(
11632 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11633 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11634 SDValue Plus8 =
11635 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11636 SDValue Store2 =
11637 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11638 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11639 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11641 }
11642
11643 // Store the input value into Value#0 of the stack slot.
11644 SDValue Store =
11645 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11646 // Load it out.
11647 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11648}
11649
11650SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11651 SelectionDAG &DAG) const {
11652 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11653 "Should only be called for ISD::INSERT_VECTOR_ELT");
11654
11655 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11656
11657 EVT VT = Op.getValueType();
11658 SDLoc dl(Op);
11659 SDValue V1 = Op.getOperand(0);
11660 SDValue V2 = Op.getOperand(1);
11661
11662 if (VT == MVT::v2f64 && C)
11663 return Op;
11664
11665 if (Subtarget.hasP9Vector()) {
11666 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11667 // because on P10, it allows this specific insert_vector_elt load pattern to
11668 // utilize the refactored load and store infrastructure in order to exploit
11669 // prefixed loads.
11670 // On targets with inexpensive direct moves (Power9 and up), a
11671 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11672 // load since a single precision load will involve conversion to double
11673 // precision on the load followed by another conversion to single precision.
11674 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11675 (isa<LoadSDNode>(V2))) {
11676 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11677 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11678 SDValue InsVecElt =
11679 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11680 BitcastLoad, Op.getOperand(2));
11681 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11682 }
11683 }
11684
11685 if (Subtarget.isISA3_1()) {
11686 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11687 return SDValue();
11688 // On P10, we have legal lowering for constant and variable indices for
11689 // all vectors.
11690 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11691 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11692 return Op;
11693 }
11694
11695 // Before P10, we have legal lowering for constant indices but not for
11696 // variable ones.
11697 if (!C)
11698 return SDValue();
11699
11700 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11701 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11702 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11703 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11704 unsigned InsertAtElement = C->getZExtValue();
11705 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11706 if (Subtarget.isLittleEndian()) {
11707 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11708 }
11709 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11710 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11711 }
11712 return Op;
11713}
11714
11715SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11716 SelectionDAG &DAG) const {
11717 SDLoc dl(Op);
11718 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11719 SDValue LoadChain = LN->getChain();
11720 SDValue BasePtr = LN->getBasePtr();
11721 EVT VT = Op.getValueType();
11722
11723 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11724 return Op;
11725
11726 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11727 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11728 // 2 or 4 vsx registers.
11729 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11730 "Type unsupported without MMA");
11731 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11732 "Type unsupported without paired vector support");
11733 Align Alignment = LN->getAlign();
11735 SmallVector<SDValue, 4> LoadChains;
11736 unsigned NumVecs = VT.getSizeInBits() / 128;
11737 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11738 SDValue Load =
11739 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11740 LN->getPointerInfo().getWithOffset(Idx * 16),
11741 commonAlignment(Alignment, Idx * 16),
11742 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11743 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11744 DAG.getConstant(16, dl, BasePtr.getValueType()));
11745 Loads.push_back(Load);
11746 LoadChains.push_back(Load.getValue(1));
11747 }
11748 if (Subtarget.isLittleEndian()) {
11749 std::reverse(Loads.begin(), Loads.end());
11750 std::reverse(LoadChains.begin(), LoadChains.end());
11751 }
11752 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11753 SDValue Value =
11754 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11755 dl, VT, Loads);
11756 SDValue RetOps[] = {Value, TF};
11757 return DAG.getMergeValues(RetOps, dl);
11758}
11759
11760SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11761 SelectionDAG &DAG) const {
11762 SDLoc dl(Op);
11763 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11764 SDValue StoreChain = SN->getChain();
11765 SDValue BasePtr = SN->getBasePtr();
11766 SDValue Value = SN->getValue();
11767 SDValue Value2 = SN->getValue();
11768 EVT StoreVT = Value.getValueType();
11769
11770 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11771 return Op;
11772
11773 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11774 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11775 // underlying registers individually.
11776 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11777 "Type unsupported without MMA");
11778 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11779 "Type unsupported without paired vector support");
11780 Align Alignment = SN->getAlign();
11782 unsigned NumVecs = 2;
11783 if (StoreVT == MVT::v512i1) {
11784 if (Subtarget.isISAFuture()) {
11785 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11786 MachineSDNode *ExtNode = DAG.getMachineNode(
11787 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
11788
11789 Value = SDValue(ExtNode, 0);
11790 Value2 = SDValue(ExtNode, 1);
11791 } else
11792 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11793 NumVecs = 4;
11794 }
11795 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11796 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11797 SDValue Elt;
11798 if (Subtarget.isISAFuture()) {
11799 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11800 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11801 Idx > 1 ? Value2 : Value,
11802 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11803 } else
11804 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11805 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11806
11807 SDValue Store =
11808 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11809 SN->getPointerInfo().getWithOffset(Idx * 16),
11810 commonAlignment(Alignment, Idx * 16),
11811 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11812 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11813 DAG.getConstant(16, dl, BasePtr.getValueType()));
11814 Stores.push_back(Store);
11815 }
11816 SDValue TF = DAG.getTokenFactor(dl, Stores);
11817 return TF;
11818}
11819
11820SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11821 SDLoc dl(Op);
11822 if (Op.getValueType() == MVT::v4i32) {
11823 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11824
11825 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11826 // +16 as shift amt.
11827 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11828 SDValue RHSSwap = // = vrlw RHS, 16
11829 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11830
11831 // Shrinkify inputs to v8i16.
11832 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11833 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11834 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11835
11836 // Low parts multiplied together, generating 32-bit results (we ignore the
11837 // top parts).
11838 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11839 LHS, RHS, DAG, dl, MVT::v4i32);
11840
11841 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11842 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11843 // Shift the high parts up 16 bits.
11844 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11845 Neg16, DAG, dl);
11846 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11847 } else if (Op.getValueType() == MVT::v16i8) {
11848 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11849 bool isLittleEndian = Subtarget.isLittleEndian();
11850
11851 // Multiply the even 8-bit parts, producing 16-bit sums.
11852 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11853 LHS, RHS, DAG, dl, MVT::v8i16);
11854 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11855
11856 // Multiply the odd 8-bit parts, producing 16-bit sums.
11857 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11858 LHS, RHS, DAG, dl, MVT::v8i16);
11859 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11860
11861 // Merge the results together. Because vmuleub and vmuloub are
11862 // instructions with a big-endian bias, we must reverse the
11863 // element numbering and reverse the meaning of "odd" and "even"
11864 // when generating little endian code.
11865 int Ops[16];
11866 for (unsigned i = 0; i != 8; ++i) {
11867 if (isLittleEndian) {
11868 Ops[i*2 ] = 2*i;
11869 Ops[i*2+1] = 2*i+16;
11870 } else {
11871 Ops[i*2 ] = 2*i+1;
11872 Ops[i*2+1] = 2*i+1+16;
11873 }
11874 }
11875 if (isLittleEndian)
11876 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11877 else
11878 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11879 } else {
11880 llvm_unreachable("Unknown mul to lower!");
11881 }
11882}
11883
11884SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11885 bool IsStrict = Op->isStrictFPOpcode();
11886 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11887 !Subtarget.hasP9Vector())
11888 return SDValue();
11889
11890 return Op;
11891}
11892
11893// Custom lowering for fpext vf32 to v2f64
11894SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11895
11896 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11897 "Should only be called for ISD::FP_EXTEND");
11898
11899 // FIXME: handle extends from half precision float vectors on P9.
11900 // We only want to custom lower an extend from v2f32 to v2f64.
11901 if (Op.getValueType() != MVT::v2f64 ||
11902 Op.getOperand(0).getValueType() != MVT::v2f32)
11903 return SDValue();
11904
11905 SDLoc dl(Op);
11906 SDValue Op0 = Op.getOperand(0);
11907
11908 switch (Op0.getOpcode()) {
11909 default:
11910 return SDValue();
11912 assert(Op0.getNumOperands() == 2 &&
11914 "Node should have 2 operands with second one being a constant!");
11915
11916 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11917 return SDValue();
11918
11919 // Custom lower is only done for high or low doubleword.
11920 int Idx = Op0.getConstantOperandVal(1);
11921 if (Idx % 2 != 0)
11922 return SDValue();
11923
11924 // Since input is v4f32, at this point Idx is either 0 or 2.
11925 // Shift to get the doubleword position we want.
11926 int DWord = Idx >> 1;
11927
11928 // High and low word positions are different on little endian.
11929 if (Subtarget.isLittleEndian())
11930 DWord ^= 0x1;
11931
11932 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11933 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11934 }
11935 case ISD::FADD:
11936 case ISD::FMUL:
11937 case ISD::FSUB: {
11938 SDValue NewLoad[2];
11939 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11940 // Ensure both input are loads.
11941 SDValue LdOp = Op0.getOperand(i);
11942 if (LdOp.getOpcode() != ISD::LOAD)
11943 return SDValue();
11944 // Generate new load node.
11946 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11947 NewLoad[i] = DAG.getMemIntrinsicNode(
11948 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11949 LD->getMemoryVT(), LD->getMemOperand());
11950 }
11951 SDValue NewOp =
11952 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11953 NewLoad[1], Op0.getNode()->getFlags());
11954 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11955 DAG.getConstant(0, dl, MVT::i32));
11956 }
11957 case ISD::LOAD: {
11959 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11960 SDValue NewLd = DAG.getMemIntrinsicNode(
11961 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11962 LD->getMemoryVT(), LD->getMemOperand());
11963 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11964 DAG.getConstant(0, dl, MVT::i32));
11965 }
11966 }
11967 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11968}
11969
11970/// LowerOperation - Provide custom lowering hooks for some operations.
11971///
11973 switch (Op.getOpcode()) {
11974 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11975 case ISD::FPOW: return lowerPow(Op, DAG);
11976 case ISD::FSIN: return lowerSin(Op, DAG);
11977 case ISD::FCOS: return lowerCos(Op, DAG);
11978 case ISD::FLOG: return lowerLog(Op, DAG);
11979 case ISD::FLOG10: return lowerLog10(Op, DAG);
11980 case ISD::FEXP: return lowerExp(Op, DAG);
11981 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11982 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11983 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11984 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11985 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11986 case ISD::STRICT_FSETCC:
11988 case ISD::SETCC: return LowerSETCC(Op, DAG);
11989 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11990 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11991
11992 case ISD::INLINEASM:
11993 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11994 // Variable argument lowering.
11995 case ISD::VASTART: return LowerVASTART(Op, DAG);
11996 case ISD::VAARG: return LowerVAARG(Op, DAG);
11997 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11998
11999 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12000 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12001 case ISD::GET_DYNAMIC_AREA_OFFSET:
12002 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12003
12004 // Exception handling lowering.
12005 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12006 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12007 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12008
12009 case ISD::LOAD: return LowerLOAD(Op, DAG);
12010 case ISD::STORE: return LowerSTORE(Op, DAG);
12011 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12012 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12015 case ISD::FP_TO_UINT:
12016 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12019 case ISD::UINT_TO_FP:
12020 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12021 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12022 case ISD::SET_ROUNDING:
12023 return LowerSET_ROUNDING(Op, DAG);
12024
12025 // Lower 64-bit shifts.
12026 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12027 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12028 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12029
12030 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12031 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12032
12033 // Vector-related lowering.
12034 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12035 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12036 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12037 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12038 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12039 case ISD::MUL: return LowerMUL(Op, DAG);
12040 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12042 case ISD::FP_ROUND:
12043 return LowerFP_ROUND(Op, DAG);
12044 case ISD::ROTL: return LowerROTL(Op, DAG);
12045
12046 // For counter-based loop handling.
12047 case ISD::INTRINSIC_W_CHAIN: return SDValue();
12048
12049 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12050
12051 // Frame & Return address.
12052 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12053 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12054
12056 return LowerINTRINSIC_VOID(Op, DAG);
12057 case ISD::BSWAP:
12058 return LowerBSWAP(Op, DAG);
12059 case ISD::ATOMIC_CMP_SWAP:
12060 return LowerATOMIC_CMP_SWAP(Op, DAG);
12061 case ISD::ATOMIC_STORE:
12062 return LowerATOMIC_LOAD_STORE(Op, DAG);
12063 case ISD::IS_FPCLASS:
12064 return LowerIS_FPCLASS(Op, DAG);
12065 }
12066}
12067
12070 SelectionDAG &DAG) const {
12071 SDLoc dl(N);
12072 switch (N->getOpcode()) {
12073 default:
12074 llvm_unreachable("Do not know how to custom type legalize this operation!");
12075 case ISD::ATOMIC_LOAD: {
12076 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12077 Results.push_back(Res);
12078 Results.push_back(Res.getValue(1));
12079 break;
12080 }
12081 case ISD::READCYCLECOUNTER: {
12082 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12083 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12084
12085 Results.push_back(
12086 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12087 Results.push_back(RTB.getValue(2));
12088 break;
12089 }
12091 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12092 break;
12093
12094 assert(N->getValueType(0) == MVT::i1 &&
12095 "Unexpected result type for CTR decrement intrinsic");
12096 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12097 N->getValueType(0));
12098 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12099 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12100 N->getOperand(1));
12101
12102 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12103 Results.push_back(NewInt.getValue(1));
12104 break;
12105 }
12107 switch (N->getConstantOperandVal(0)) {
12108 case Intrinsic::ppc_pack_longdouble:
12109 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12110 N->getOperand(2), N->getOperand(1)));
12111 break;
12112 case Intrinsic::ppc_maxfe:
12113 case Intrinsic::ppc_minfe:
12114 case Intrinsic::ppc_fnmsub:
12115 case Intrinsic::ppc_convert_f128_to_ppcf128:
12116 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12117 break;
12118 }
12119 break;
12120 }
12121 case ISD::VAARG: {
12122 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12123 return;
12124
12125 EVT VT = N->getValueType(0);
12126
12127 if (VT == MVT::i64) {
12128 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12129
12130 Results.push_back(NewNode);
12131 Results.push_back(NewNode.getValue(1));
12132 }
12133 return;
12134 }
12137 case ISD::FP_TO_SINT:
12138 case ISD::FP_TO_UINT: {
12139 // LowerFP_TO_INT() can only handle f32 and f64.
12140 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12141 MVT::ppcf128)
12142 return;
12143 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12144 Results.push_back(LoweredValue);
12145 if (N->isStrictFPOpcode())
12146 Results.push_back(LoweredValue.getValue(1));
12147 return;
12148 }
12149 case ISD::TRUNCATE: {
12150 if (!N->getValueType(0).isVector())
12151 return;
12152 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12153 if (Lowered)
12154 Results.push_back(Lowered);
12155 return;
12156 }
12157 case ISD::SCALAR_TO_VECTOR: {
12158 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12159 if (Lowered)
12160 Results.push_back(Lowered);
12161 return;
12162 }
12163 case ISD::FSHL:
12164 case ISD::FSHR:
12165 // Don't handle funnel shifts here.
12166 return;
12167 case ISD::BITCAST:
12168 // Don't handle bitcast here.
12169 return;
12170 case ISD::FP_EXTEND:
12171 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12172 if (Lowered)
12173 Results.push_back(Lowered);
12174 return;
12175 }
12176}
12177
12178//===----------------------------------------------------------------------===//
12179// Other Lowering Code
12180//===----------------------------------------------------------------------===//
12181
12183 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
12184 Function *Func = Intrinsic::getDeclaration(M, Id);
12185 return Builder.CreateCall(Func, {});
12186}
12187
12188// The mappings for emitLeading/TrailingFence is taken from
12189// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12191 Instruction *Inst,
12192 AtomicOrdering Ord) const {
12194 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12195 if (isReleaseOrStronger(Ord))
12196 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12197 return nullptr;
12198}
12199
12201 Instruction *Inst,
12202 AtomicOrdering Ord) const {
12203 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12204 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12205 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12206 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12207 if (isa<LoadInst>(Inst))
12208 return Builder.CreateCall(
12210 Builder.GetInsertBlock()->getParent()->getParent(),
12211 Intrinsic::ppc_cfence, {Inst->getType()}),
12212 {Inst});
12213 // FIXME: Can use isync for rmw operation.
12214 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12215 }
12216 return nullptr;
12217}
12218
12221 unsigned AtomicSize,
12222 unsigned BinOpcode,
12223 unsigned CmpOpcode,
12224 unsigned CmpPred) const {
12225 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12226 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12227
12228 auto LoadMnemonic = PPC::LDARX;
12229 auto StoreMnemonic = PPC::STDCX;
12230 switch (AtomicSize) {
12231 default:
12232 llvm_unreachable("Unexpected size of atomic entity");
12233 case 1:
12234 LoadMnemonic = PPC::LBARX;
12235 StoreMnemonic = PPC::STBCX;
12236 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12237 break;
12238 case 2:
12239 LoadMnemonic = PPC::LHARX;
12240 StoreMnemonic = PPC::STHCX;
12241 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12242 break;
12243 case 4:
12244 LoadMnemonic = PPC::LWARX;
12245 StoreMnemonic = PPC::STWCX;
12246 break;
12247 case 8:
12248 LoadMnemonic = PPC::LDARX;
12249 StoreMnemonic = PPC::STDCX;
12250 break;
12251 }
12252
12253 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12254 MachineFunction *F = BB->getParent();
12256
12257 Register dest = MI.getOperand(0).getReg();
12258 Register ptrA = MI.getOperand(1).getReg();
12259 Register ptrB = MI.getOperand(2).getReg();
12260 Register incr = MI.getOperand(3).getReg();
12261 DebugLoc dl = MI.getDebugLoc();
12262
12263 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12264 MachineBasicBlock *loop2MBB =
12265 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12266 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12267 F->insert(It, loopMBB);
12268 if (CmpOpcode)
12269 F->insert(It, loop2MBB);
12270 F->insert(It, exitMBB);
12271 exitMBB->splice(exitMBB->begin(), BB,
12272 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12274
12275 MachineRegisterInfo &RegInfo = F->getRegInfo();
12276 Register TmpReg = (!BinOpcode) ? incr :
12277 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12278 : &PPC::GPRCRegClass);
12279
12280 // thisMBB:
12281 // ...
12282 // fallthrough --> loopMBB
12283 BB->addSuccessor(loopMBB);
12284
12285 // loopMBB:
12286 // l[wd]arx dest, ptr
12287 // add r0, dest, incr
12288 // st[wd]cx. r0, ptr
12289 // bne- loopMBB
12290 // fallthrough --> exitMBB
12291
12292 // For max/min...
12293 // loopMBB:
12294 // l[wd]arx dest, ptr
12295 // cmpl?[wd] dest, incr
12296 // bgt exitMBB
12297 // loop2MBB:
12298 // st[wd]cx. dest, ptr
12299 // bne- loopMBB
12300 // fallthrough --> exitMBB
12301
12302 BB = loopMBB;
12303 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12304 .addReg(ptrA).addReg(ptrB);
12305 if (BinOpcode)
12306 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12307 if (CmpOpcode) {
12308 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12309 // Signed comparisons of byte or halfword values must be sign-extended.
12310 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12311 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12312 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12313 ExtReg).addReg(dest);
12314 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12315 } else
12316 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12317
12318 BuildMI(BB, dl, TII->get(PPC::BCC))
12319 .addImm(CmpPred)
12320 .addReg(CrReg)
12321 .addMBB(exitMBB);
12322 BB->addSuccessor(loop2MBB);
12323 BB->addSuccessor(exitMBB);
12324 BB = loop2MBB;
12325 }
12326 BuildMI(BB, dl, TII->get(StoreMnemonic))
12327 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12328 BuildMI(BB, dl, TII->get(PPC::BCC))
12329 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12330 BB->addSuccessor(loopMBB);
12331 BB->addSuccessor(exitMBB);
12332
12333 // exitMBB:
12334 // ...
12335 BB = exitMBB;
12336 return BB;
12337}
12338
12340 switch(MI.getOpcode()) {
12341 default:
12342 return false;
12343 case PPC::COPY:
12344 return TII->isSignExtended(MI.getOperand(1).getReg(),
12345 &MI.getMF()->getRegInfo());
12346 case PPC::LHA:
12347 case PPC::LHA8:
12348 case PPC::LHAU:
12349 case PPC::LHAU8:
12350 case PPC::LHAUX:
12351 case PPC::LHAUX8:
12352 case PPC::LHAX:
12353 case PPC::LHAX8:
12354 case PPC::LWA:
12355 case PPC::LWAUX:
12356 case PPC::LWAX:
12357 case PPC::LWAX_32:
12358 case PPC::LWA_32:
12359 case PPC::PLHA:
12360 case PPC::PLHA8:
12361 case PPC::PLHA8pc:
12362 case PPC::PLHApc:
12363 case PPC::PLWA:
12364 case PPC::PLWA8:
12365 case PPC::PLWA8pc:
12366 case PPC::PLWApc:
12367 case PPC::EXTSB:
12368 case PPC::EXTSB8:
12369 case PPC::EXTSB8_32_64:
12370 case PPC::EXTSB8_rec:
12371 case PPC::EXTSB_rec:
12372 case PPC::EXTSH:
12373 case PPC::EXTSH8:
12374 case PPC::EXTSH8_32_64:
12375 case PPC::EXTSH8_rec:
12376 case PPC::EXTSH_rec:
12377 case PPC::EXTSW:
12378 case PPC::EXTSWSLI:
12379 case PPC::EXTSWSLI_32_64:
12380 case PPC::EXTSWSLI_32_64_rec:
12381 case PPC::EXTSWSLI_rec:
12382 case PPC::EXTSW_32:
12383 case PPC::EXTSW_32_64:
12384 case PPC::EXTSW_32_64_rec:
12385 case PPC::EXTSW_rec:
12386 case PPC::SRAW:
12387 case PPC::SRAWI:
12388 case PPC::SRAWI_rec:
12389 case PPC::SRAW_rec:
12390 return true;
12391 }
12392 return false;
12393}
12394
12397 bool is8bit, // operation
12398 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12399 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12400 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12401
12402 // If this is a signed comparison and the value being compared is not known
12403 // to be sign extended, sign extend it here.
12404 DebugLoc dl = MI.getDebugLoc();
12405 MachineFunction *F = BB->getParent();
12406 MachineRegisterInfo &RegInfo = F->getRegInfo();
12407 Register incr = MI.getOperand(3).getReg();
12408 bool IsSignExtended =
12409 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12410
12411 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12412 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12413 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12414 .addReg(MI.getOperand(3).getReg());
12415 MI.getOperand(3).setReg(ValueReg);
12416 incr = ValueReg;
12417 }
12418 // If we support part-word atomic mnemonics, just use them
12419 if (Subtarget.hasPartwordAtomics())
12420 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12421 CmpPred);
12422
12423 // In 64 bit mode we have to use 64 bits for addresses, even though the
12424 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12425 // registers without caring whether they're 32 or 64, but here we're
12426 // doing actual arithmetic on the addresses.
12427 bool is64bit = Subtarget.isPPC64();
12428 bool isLittleEndian = Subtarget.isLittleEndian();
12429 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12430
12431 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12433
12434 Register dest = MI.getOperand(0).getReg();
12435 Register ptrA = MI.getOperand(1).getReg();
12436 Register ptrB = MI.getOperand(2).getReg();
12437
12438 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12439 MachineBasicBlock *loop2MBB =
12440 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12441 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12442 F->insert(It, loopMBB);
12443 if (CmpOpcode)
12444 F->insert(It, loop2MBB);
12445 F->insert(It, exitMBB);
12446 exitMBB->splice(exitMBB->begin(), BB,
12447 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12449
12450 const TargetRegisterClass *RC =
12451 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12452 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12453
12454 Register PtrReg = RegInfo.createVirtualRegister(RC);
12455 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12456 Register ShiftReg =
12457 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12458 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12459 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12460 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12461 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12462 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12463 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12464 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12465 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12466 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12467 Register Ptr1Reg;
12468 Register TmpReg =
12469 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12470
12471 // thisMBB:
12472 // ...
12473 // fallthrough --> loopMBB
12474 BB->addSuccessor(loopMBB);
12475
12476 // The 4-byte load must be aligned, while a char or short may be
12477 // anywhere in the word. Hence all this nasty bookkeeping code.
12478 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12479 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12480 // xori shift, shift1, 24 [16]
12481 // rlwinm ptr, ptr1, 0, 0, 29
12482 // slw incr2, incr, shift
12483 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12484 // slw mask, mask2, shift
12485 // loopMBB:
12486 // lwarx tmpDest, ptr
12487 // add tmp, tmpDest, incr2
12488 // andc tmp2, tmpDest, mask
12489 // and tmp3, tmp, mask
12490 // or tmp4, tmp3, tmp2
12491 // stwcx. tmp4, ptr
12492 // bne- loopMBB
12493 // fallthrough --> exitMBB
12494 // srw SrwDest, tmpDest, shift
12495 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12496 if (ptrA != ZeroReg) {
12497 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12498 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12499 .addReg(ptrA)
12500 .addReg(ptrB);
12501 } else {
12502 Ptr1Reg = ptrB;
12503 }
12504 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12505 // mode.
12506 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12507 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12508 .addImm(3)
12509 .addImm(27)
12510 .addImm(is8bit ? 28 : 27);
12511 if (!isLittleEndian)
12512 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12513 .addReg(Shift1Reg)
12514 .addImm(is8bit ? 24 : 16);
12515 if (is64bit)
12516 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12517 .addReg(Ptr1Reg)
12518 .addImm(0)
12519 .addImm(61);
12520 else
12521 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12522 .addReg(Ptr1Reg)
12523 .addImm(0)
12524 .addImm(0)
12525 .addImm(29);
12526 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12527 if (is8bit)
12528 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12529 else {
12530 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12531 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12532 .addReg(Mask3Reg)
12533 .addImm(65535);
12534 }
12535 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12536 .addReg(Mask2Reg)
12537 .addReg(ShiftReg);
12538
12539 BB = loopMBB;
12540 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12541 .addReg(ZeroReg)
12542 .addReg(PtrReg);
12543 if (BinOpcode)
12544 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12545 .addReg(Incr2Reg)
12546 .addReg(TmpDestReg);
12547 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12548 .addReg(TmpDestReg)
12549 .addReg(MaskReg);
12550 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12551 if (CmpOpcode) {
12552 // For unsigned comparisons, we can directly compare the shifted values.
12553 // For signed comparisons we shift and sign extend.
12554 Register SReg = RegInfo.createVirtualRegister(GPRC);
12555 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12556 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12557 .addReg(TmpDestReg)
12558 .addReg(MaskReg);
12559 unsigned ValueReg = SReg;
12560 unsigned CmpReg = Incr2Reg;
12561 if (CmpOpcode == PPC::CMPW) {
12562 ValueReg = RegInfo.createVirtualRegister(GPRC);
12563 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12564 .addReg(SReg)
12565 .addReg(ShiftReg);
12566 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12567 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12568 .addReg(ValueReg);
12569 ValueReg = ValueSReg;
12570 CmpReg = incr;
12571 }
12572 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12573 BuildMI(BB, dl, TII->get(PPC::BCC))
12574 .addImm(CmpPred)
12575 .addReg(CrReg)
12576 .addMBB(exitMBB);
12577 BB->addSuccessor(loop2MBB);
12578 BB->addSuccessor(exitMBB);
12579 BB = loop2MBB;
12580 }
12581 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12582 BuildMI(BB, dl, TII->get(PPC::STWCX))
12583 .addReg(Tmp4Reg)
12584 .addReg(ZeroReg)
12585 .addReg(PtrReg);
12586 BuildMI(BB, dl, TII->get(PPC::BCC))
12588 .addReg(PPC::CR0)
12589 .addMBB(loopMBB);
12590 BB->addSuccessor(loopMBB);
12591 BB->addSuccessor(exitMBB);
12592
12593 // exitMBB:
12594 // ...
12595 BB = exitMBB;
12596 // Since the shift amount is not a constant, we need to clear
12597 // the upper bits with a separate RLWINM.
12598 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12599 .addReg(SrwDestReg)
12600 .addImm(0)
12601 .addImm(is8bit ? 24 : 16)
12602 .addImm(31);
12603 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12604 .addReg(TmpDestReg)
12605 .addReg(ShiftReg);
12606 return BB;
12607}
12608
12611 MachineBasicBlock *MBB) const {
12612 DebugLoc DL = MI.getDebugLoc();
12613 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12614 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12615
12616 MachineFunction *MF = MBB->getParent();
12618
12619 const BasicBlock *BB = MBB->getBasicBlock();
12621
12622 Register DstReg = MI.getOperand(0).getReg();
12623 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12624 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12625 Register mainDstReg = MRI.createVirtualRegister(RC);
12626 Register restoreDstReg = MRI.createVirtualRegister(RC);
12627
12628 MVT PVT = getPointerTy(MF->getDataLayout());
12629 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12630 "Invalid Pointer Size!");
12631 // For v = setjmp(buf), we generate
12632 //
12633 // thisMBB:
12634 // SjLjSetup mainMBB
12635 // bl mainMBB
12636 // v_restore = 1
12637 // b sinkMBB
12638 //
12639 // mainMBB:
12640 // buf[LabelOffset] = LR
12641 // v_main = 0
12642 //
12643 // sinkMBB:
12644 // v = phi(main, restore)
12645 //
12646
12647 MachineBasicBlock *thisMBB = MBB;
12648 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12649 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12650 MF->insert(I, mainMBB);
12651 MF->insert(I, sinkMBB);
12652
12654
12655 // Transfer the remainder of BB and its successor edges to sinkMBB.
12656 sinkMBB->splice(sinkMBB->begin(), MBB,
12657 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12659
12660 // Note that the structure of the jmp_buf used here is not compatible
12661 // with that used by libc, and is not designed to be. Specifically, it
12662 // stores only those 'reserved' registers that LLVM does not otherwise
12663 // understand how to spill. Also, by convention, by the time this
12664 // intrinsic is called, Clang has already stored the frame address in the
12665 // first slot of the buffer and stack address in the third. Following the
12666 // X86 target code, we'll store the jump address in the second slot. We also
12667 // need to save the TOC pointer (R2) to handle jumps between shared
12668 // libraries, and that will be stored in the fourth slot. The thread
12669 // identifier (R13) is not affected.
12670
12671 // thisMBB:
12672 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12673 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12674 const int64_t BPOffset = 4 * PVT.getStoreSize();
12675
12676 // Prepare IP either in reg.
12677 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12678 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12679 Register BufReg = MI.getOperand(1).getReg();
12680
12681 if (Subtarget.is64BitELFABI()) {
12683 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12684 .addReg(PPC::X2)
12685 .addImm(TOCOffset)
12686 .addReg(BufReg)
12687 .cloneMemRefs(MI);
12688 }
12689
12690 // Naked functions never have a base pointer, and so we use r1. For all
12691 // other functions, this decision must be delayed until during PEI.
12692 unsigned BaseReg;
12693 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12694 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12695 else
12696 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12697
12698 MIB = BuildMI(*thisMBB, MI, DL,
12699 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12700 .addReg(BaseReg)
12701 .addImm(BPOffset)
12702 .addReg(BufReg)
12703 .cloneMemRefs(MI);
12704
12705 // Setup
12706 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12707 MIB.addRegMask(TRI->getNoPreservedMask());
12708
12709 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12710
12711 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12712 .addMBB(mainMBB);
12713 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12714
12715 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12716 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12717
12718 // mainMBB:
12719 // mainDstReg = 0
12720 MIB =
12721 BuildMI(mainMBB, DL,
12722 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12723
12724 // Store IP
12725 if (Subtarget.isPPC64()) {
12726 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12727 .addReg(LabelReg)
12728 .addImm(LabelOffset)
12729 .addReg(BufReg);
12730 } else {
12731 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12732 .addReg(LabelReg)
12733 .addImm(LabelOffset)
12734 .addReg(BufReg);
12735 }
12736 MIB.cloneMemRefs(MI);
12737
12738 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12739 mainMBB->addSuccessor(sinkMBB);
12740
12741 // sinkMBB:
12742 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12743 TII->get(PPC::PHI), DstReg)
12744 .addReg(mainDstReg).addMBB(mainMBB)
12745 .addReg(restoreDstReg).addMBB(thisMBB);
12746
12747 MI.eraseFromParent();
12748 return sinkMBB;
12749}
12750
12753 MachineBasicBlock *MBB) const {
12754 DebugLoc DL = MI.getDebugLoc();
12755 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12756
12757 MachineFunction *MF = MBB->getParent();
12759
12760 MVT PVT = getPointerTy(MF->getDataLayout());
12761 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12762 "Invalid Pointer Size!");
12763
12764 const TargetRegisterClass *RC =
12765 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12766 Register Tmp = MRI.createVirtualRegister(RC);
12767 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12768 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12769 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12770 unsigned BP =
12771 (PVT == MVT::i64)
12772 ? PPC::X30
12773 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12774 : PPC::R30);
12775
12777
12778 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12779 const int64_t SPOffset = 2 * PVT.getStoreSize();
12780 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12781 const int64_t BPOffset = 4 * PVT.getStoreSize();
12782
12783 Register BufReg = MI.getOperand(0).getReg();
12784
12785 // Reload FP (the jumped-to function may not have had a
12786 // frame pointer, and if so, then its r31 will be restored
12787 // as necessary).
12788 if (PVT == MVT::i64) {
12789 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12790 .addImm(0)
12791 .addReg(BufReg);
12792 } else {
12793 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12794 .addImm(0)
12795 .addReg(BufReg);
12796 }
12797 MIB.cloneMemRefs(MI);
12798
12799 // Reload IP
12800 if (PVT == MVT::i64) {
12801 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12802 .addImm(LabelOffset)
12803 .addReg(BufReg);
12804 } else {
12805 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12806 .addImm(LabelOffset)
12807 .addReg(BufReg);
12808 }
12809 MIB.cloneMemRefs(MI);
12810
12811 // Reload SP
12812 if (PVT == MVT::i64) {
12813 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12814 .addImm(SPOffset)
12815 .addReg(BufReg);
12816 } else {
12817 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12818 .addImm(SPOffset)
12819 .addReg(BufReg);
12820 }
12821 MIB.cloneMemRefs(MI);
12822
12823 // Reload BP
12824 if (PVT == MVT::i64) {
12825 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12826 .addImm(BPOffset)
12827 .addReg(BufReg);
12828 } else {
12829 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12830 .addImm(BPOffset)
12831 .addReg(BufReg);
12832 }
12833 MIB.cloneMemRefs(MI);
12834
12835 // Reload TOC
12836 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12838 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12839 .addImm(TOCOffset)
12840 .addReg(BufReg)
12841 .cloneMemRefs(MI);
12842 }
12843
12844 // Jump
12845 BuildMI(*MBB, MI, DL,
12846 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12847 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12848
12849 MI.eraseFromParent();
12850 return MBB;
12851}
12852
12854 // If the function specifically requests inline stack probes, emit them.
12855 if (MF.getFunction().hasFnAttribute("probe-stack"))
12856 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12857 "inline-asm";
12858 return false;
12859}
12860
12862 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12863 unsigned StackAlign = TFI->getStackAlignment();
12864 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12865 "Unexpected stack alignment");
12866 // The default stack probe size is 4096 if the function has no
12867 // stack-probe-size attribute.
12868 const Function &Fn = MF.getFunction();
12869 unsigned StackProbeSize =
12870 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12871 // Round down to the stack alignment.
12872 StackProbeSize &= ~(StackAlign - 1);
12873 return StackProbeSize ? StackProbeSize : StackAlign;
12874}
12875
12876// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12877// into three phases. In the first phase, it uses pseudo instruction
12878// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12879// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12880// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12881// MaxCallFrameSize so that it can calculate correct data area pointer.
12884 MachineBasicBlock *MBB) const {
12885 const bool isPPC64 = Subtarget.isPPC64();
12886 MachineFunction *MF = MBB->getParent();
12887 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12888 DebugLoc DL = MI.getDebugLoc();
12889 const unsigned ProbeSize = getStackProbeSize(*MF);
12890 const BasicBlock *ProbedBB = MBB->getBasicBlock();
12892 // The CFG of probing stack looks as
12893 // +-----+
12894 // | MBB |
12895 // +--+--+
12896 // |
12897 // +----v----+
12898 // +--->+ TestMBB +---+
12899 // | +----+----+ |
12900 // | | |
12901 // | +-----v----+ |
12902 // +---+ BlockMBB | |
12903 // +----------+ |
12904 // |
12905 // +---------+ |
12906 // | TailMBB +<--+
12907 // +---------+
12908 // In MBB, calculate previous frame pointer and final stack pointer.
12909 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12910 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12911 // TailMBB is spliced via \p MI.
12912 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12913 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12914 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12915
12917 MF->insert(MBBIter, TestMBB);
12918 MF->insert(MBBIter, BlockMBB);
12919 MF->insert(MBBIter, TailMBB);
12920
12921 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12922 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12923
12924 Register DstReg = MI.getOperand(0).getReg();
12925 Register NegSizeReg = MI.getOperand(1).getReg();
12926 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12927 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12928 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12929 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12930
12931 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12932 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12933 // NegSize.
12934 unsigned ProbeOpc;
12935 if (!MRI.hasOneNonDBGUse(NegSizeReg))
12936 ProbeOpc =
12937 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12938 else
12939 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12940 // and NegSizeReg will be allocated in the same phyreg to avoid
12941 // redundant copy when NegSizeReg has only one use which is current MI and
12942 // will be replaced by PREPARE_PROBED_ALLOCA then.
12943 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12944 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12945 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12946 .addDef(ActualNegSizeReg)
12947 .addReg(NegSizeReg)
12948 .add(MI.getOperand(2))
12949 .add(MI.getOperand(3));
12950
12951 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12952 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12953 FinalStackPtr)
12954 .addReg(SPReg)
12955 .addReg(ActualNegSizeReg);
12956
12957 // Materialize a scratch register for update.
12958 int64_t NegProbeSize = -(int64_t)ProbeSize;
12959 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12960 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12961 if (!isInt<16>(NegProbeSize)) {
12962 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12963 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12964 .addImm(NegProbeSize >> 16);
12965 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12966 ScratchReg)
12967 .addReg(TempReg)
12968 .addImm(NegProbeSize & 0xFFFF);
12969 } else
12970 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12971 .addImm(NegProbeSize);
12972
12973 {
12974 // Probing leading residual part.
12975 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12976 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12977 .addReg(ActualNegSizeReg)
12978 .addReg(ScratchReg);
12979 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12980 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12981 .addReg(Div)
12982 .addReg(ScratchReg);
12983 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12984 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12985 .addReg(Mul)
12986 .addReg(ActualNegSizeReg);
12987 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12988 .addReg(FramePointer)
12989 .addReg(SPReg)
12990 .addReg(NegMod);
12991 }
12992
12993 {
12994 // Remaining part should be multiple of ProbeSize.
12995 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12996 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12997 .addReg(SPReg)
12998 .addReg(FinalStackPtr);
12999 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13001 .addReg(CmpResult)
13002 .addMBB(TailMBB);
13003 TestMBB->addSuccessor(BlockMBB);
13004 TestMBB->addSuccessor(TailMBB);
13005 }
13006
13007 {
13008 // Touch the block.
13009 // |P...|P...|P...
13010 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13011 .addReg(FramePointer)
13012 .addReg(SPReg)
13013 .addReg(ScratchReg);
13014 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13015 BlockMBB->addSuccessor(TestMBB);
13016 }
13017
13018 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13019 // DYNAREAOFFSET pseudo instruction to get the future result.
13020 Register MaxCallFrameSizeReg =
13021 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13022 BuildMI(TailMBB, DL,
13023 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13024 MaxCallFrameSizeReg)
13025 .add(MI.getOperand(2))
13026 .add(MI.getOperand(3));
13027 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13028 .addReg(SPReg)
13029 .addReg(MaxCallFrameSizeReg);
13030
13031 // Splice instructions after MI to TailMBB.
13032 TailMBB->splice(TailMBB->end(), MBB,
13033 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13035 MBB->addSuccessor(TestMBB);
13036
13037 // Delete the pseudo instruction.
13038 MI.eraseFromParent();
13039
13040 ++NumDynamicAllocaProbed;
13041 return TailMBB;
13042}
13043
13045 switch (MI.getOpcode()) {
13046 case PPC::SELECT_CC_I4:
13047 case PPC::SELECT_CC_I8:
13048 case PPC::SELECT_CC_F4:
13049 case PPC::SELECT_CC_F8:
13050 case PPC::SELECT_CC_F16:
13051 case PPC::SELECT_CC_VRRC:
13052 case PPC::SELECT_CC_VSFRC:
13053 case PPC::SELECT_CC_VSSRC:
13054 case PPC::SELECT_CC_VSRC:
13055 case PPC::SELECT_CC_SPE4:
13056 case PPC::SELECT_CC_SPE:
13057 return true;
13058 default:
13059 return false;
13060 }
13061}
13062
13063static bool IsSelect(MachineInstr &MI) {
13064 switch (MI.getOpcode()) {
13065 case PPC::SELECT_I4:
13066 case PPC::SELECT_I8:
13067 case PPC::SELECT_F4:
13068 case PPC::SELECT_F8:
13069 case PPC::SELECT_F16:
13070 case PPC::SELECT_SPE:
13071 case PPC::SELECT_SPE4:
13072 case PPC::SELECT_VRRC:
13073 case PPC::SELECT_VSFRC:
13074 case PPC::SELECT_VSSRC:
13075 case PPC::SELECT_VSRC:
13076 return true;
13077 default:
13078 return false;
13079 }
13080}
13081
13084 MachineBasicBlock *BB) const {
13085 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13086 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13087 if (Subtarget.is64BitELFABI() &&
13088 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13089 !Subtarget.isUsingPCRelativeCalls()) {
13090 // Call lowering should have added an r2 operand to indicate a dependence
13091 // on the TOC base pointer value. It can't however, because there is no
13092 // way to mark the dependence as implicit there, and so the stackmap code
13093 // will confuse it with a regular operand. Instead, add the dependence
13094 // here.
13095 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13096 }
13097
13098 return emitPatchPoint(MI, BB);
13099 }
13100
13101 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13102 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13103 return emitEHSjLjSetJmp(MI, BB);
13104 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13105 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13106 return emitEHSjLjLongJmp(MI, BB);
13107 }
13108
13109 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13110
13111 // To "insert" these instructions we actually have to insert their
13112 // control-flow patterns.
13113 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13115
13116 MachineFunction *F = BB->getParent();
13117 MachineRegisterInfo &MRI = F->getRegInfo();
13118
13119 if (Subtarget.hasISEL() &&
13120 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13121 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13122 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13124 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13125 MI.getOpcode() == PPC::SELECT_CC_I8)
13126 Cond.push_back(MI.getOperand(4));
13127 else
13129 Cond.push_back(MI.getOperand(1));
13130
13131 DebugLoc dl = MI.getDebugLoc();
13132 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13133 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13134 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13135 // The incoming instruction knows the destination vreg to set, the
13136 // condition code register to branch on, the true/false values to
13137 // select between, and a branch opcode to use.
13138
13139 // thisMBB:
13140 // ...
13141 // TrueVal = ...
13142 // cmpTY ccX, r1, r2
13143 // bCC sinkMBB
13144 // fallthrough --> copy0MBB
13145 MachineBasicBlock *thisMBB = BB;
13146 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13147 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13148 DebugLoc dl = MI.getDebugLoc();
13149 F->insert(It, copy0MBB);
13150 F->insert(It, sinkMBB);
13151
13152 // Set the call frame size on entry to the new basic blocks.
13153 // See https://reviews.llvm.org/D156113.
13154 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13155 copy0MBB->setCallFrameSize(CallFrameSize);
13156 sinkMBB->setCallFrameSize(CallFrameSize);
13157
13158 // Transfer the remainder of BB and its successor edges to sinkMBB.
13159 sinkMBB->splice(sinkMBB->begin(), BB,
13160 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13162
13163 // Next, add the true and fallthrough blocks as its successors.
13164 BB->addSuccessor(copy0MBB);
13165 BB->addSuccessor(sinkMBB);
13166
13167 if (IsSelect(MI)) {
13168 BuildMI(BB, dl, TII->get(PPC::BC))
13169 .addReg(MI.getOperand(1).getReg())
13170 .addMBB(sinkMBB);
13171 } else {
13172 unsigned SelectPred = MI.getOperand(4).getImm();
13173 BuildMI(BB, dl, TII->get(PPC::BCC))
13174 .addImm(SelectPred)
13175 .addReg(MI.getOperand(1).getReg())
13176 .addMBB(sinkMBB);
13177 }
13178
13179 // copy0MBB:
13180 // %FalseValue = ...
13181 // # fallthrough to sinkMBB
13182 BB = copy0MBB;
13183
13184 // Update machine-CFG edges
13185 BB->addSuccessor(sinkMBB);
13186
13187 // sinkMBB:
13188 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13189 // ...
13190 BB = sinkMBB;
13191 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13192 .addReg(MI.getOperand(3).getReg())
13193 .addMBB(copy0MBB)
13194 .addReg(MI.getOperand(2).getReg())
13195 .addMBB(thisMBB);
13196 } else if (MI.getOpcode() == PPC::ReadTB) {
13197 // To read the 64-bit time-base register on a 32-bit target, we read the
13198 // two halves. Should the counter have wrapped while it was being read, we
13199 // need to try again.
13200 // ...
13201 // readLoop:
13202 // mfspr Rx,TBU # load from TBU
13203 // mfspr Ry,TB # load from TB
13204 // mfspr Rz,TBU # load from TBU
13205 // cmpw crX,Rx,Rz # check if 'old'='new'
13206 // bne readLoop # branch if they're not equal
13207 // ...
13208
13209 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13210 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13211 DebugLoc dl = MI.getDebugLoc();
13212 F->insert(It, readMBB);
13213 F->insert(It, sinkMBB);
13214
13215 // Transfer the remainder of BB and its successor edges to sinkMBB.
13216 sinkMBB->splice(sinkMBB->begin(), BB,
13217 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13219
13220 BB->addSuccessor(readMBB);
13221 BB = readMBB;
13222
13223 MachineRegisterInfo &RegInfo = F->getRegInfo();
13224 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13225 Register LoReg = MI.getOperand(0).getReg();
13226 Register HiReg = MI.getOperand(1).getReg();
13227
13228 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13229 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13230 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13231
13232 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13233
13234 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13235 .addReg(HiReg)
13236 .addReg(ReadAgainReg);
13237 BuildMI(BB, dl, TII->get(PPC::BCC))
13239 .addReg(CmpReg)
13240 .addMBB(readMBB);
13241
13242 BB->addSuccessor(readMBB);
13243 BB->addSuccessor(sinkMBB);
13244 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13245 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13246 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13247 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13248 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13249 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13250 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13251 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13252
13253 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13254 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13255 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13256 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13257 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13258 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13259 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13260 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13261
13262 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13263 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13264 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13265 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13266 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13267 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13268 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13269 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13270
13271 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13272 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13273 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13274 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13275 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13276 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13277 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13278 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13279
13280 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13281 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13282 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13283 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13284 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13285 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13286 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13287 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13288
13289 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13290 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13291 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13292 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13293 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13294 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13295 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13296 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13297
13298 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13299 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13300 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13301 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13302 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13303 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13304 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13305 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13306
13307 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13308 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13309 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13310 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13311 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13312 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13313 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13314 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13315
13316 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13317 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13318 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13319 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13320 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13321 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13322 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13323 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13324
13325 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13326 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13327 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13328 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13329 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13330 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13331 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13332 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13333
13334 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13335 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13336 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13337 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13338 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13339 BB = EmitAtomicBinary(MI, BB, 4, 0);
13340 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13341 BB = EmitAtomicBinary(MI, BB, 8, 0);
13342 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13343 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13344 (Subtarget.hasPartwordAtomics() &&
13345 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13346 (Subtarget.hasPartwordAtomics() &&
13347 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13348 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13349
13350 auto LoadMnemonic = PPC::LDARX;
13351 auto StoreMnemonic = PPC::STDCX;
13352 switch (MI.getOpcode()) {
13353 default:
13354 llvm_unreachable("Compare and swap of unknown size");
13355 case PPC::ATOMIC_CMP_SWAP_I8:
13356 LoadMnemonic = PPC::LBARX;
13357 StoreMnemonic = PPC::STBCX;
13358 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13359 break;
13360 case PPC::ATOMIC_CMP_SWAP_I16:
13361 LoadMnemonic = PPC::LHARX;
13362 StoreMnemonic = PPC::STHCX;
13363 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13364 break;
13365 case PPC::ATOMIC_CMP_SWAP_I32:
13366 LoadMnemonic = PPC::LWARX;
13367 StoreMnemonic = PPC::STWCX;
13368 break;
13369 case PPC::ATOMIC_CMP_SWAP_I64:
13370 LoadMnemonic = PPC::LDARX;
13371 StoreMnemonic = PPC::STDCX;
13372 break;
13373 }
13374 MachineRegisterInfo &RegInfo = F->getRegInfo();
13375 Register dest = MI.getOperand(0).getReg();
13376 Register ptrA = MI.getOperand(1).getReg();
13377 Register ptrB = MI.getOperand(2).getReg();
13378 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13379 Register oldval = MI.getOperand(3).getReg();
13380 Register newval = MI.getOperand(4).getReg();
13381 DebugLoc dl = MI.getDebugLoc();
13382
13383 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13384 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13385 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13386 F->insert(It, loop1MBB);
13387 F->insert(It, loop2MBB);
13388 F->insert(It, exitMBB);
13389 exitMBB->splice(exitMBB->begin(), BB,
13390 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13392
13393 // thisMBB:
13394 // ...
13395 // fallthrough --> loopMBB
13396 BB->addSuccessor(loop1MBB);
13397
13398 // loop1MBB:
13399 // l[bhwd]arx dest, ptr
13400 // cmp[wd] dest, oldval
13401 // bne- exitBB
13402 // loop2MBB:
13403 // st[bhwd]cx. newval, ptr
13404 // bne- loopMBB
13405 // b exitBB
13406 // exitBB:
13407 BB = loop1MBB;
13408 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13409 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13410 .addReg(dest)
13411 .addReg(oldval);
13412 BuildMI(BB, dl, TII->get(PPC::BCC))
13414 .addReg(CrReg)
13415 .addMBB(exitMBB);
13416 BB->addSuccessor(loop2MBB);
13417 BB->addSuccessor(exitMBB);
13418
13419 BB = loop2MBB;
13420 BuildMI(BB, dl, TII->get(StoreMnemonic))
13421 .addReg(newval)
13422 .addReg(ptrA)
13423 .addReg(ptrB);
13424 BuildMI(BB, dl, TII->get(PPC::BCC))
13426 .addReg(PPC::CR0)
13427 .addMBB(loop1MBB);
13428 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13429 BB->addSuccessor(loop1MBB);
13430 BB->addSuccessor(exitMBB);
13431
13432 // exitMBB:
13433 // ...
13434 BB = exitMBB;
13435 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13436 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13437 // We must use 64-bit registers for addresses when targeting 64-bit,
13438 // since we're actually doing arithmetic on them. Other registers
13439 // can be 32-bit.
13440 bool is64bit = Subtarget.isPPC64();
13441 bool isLittleEndian = Subtarget.isLittleEndian();
13442 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13443
13444 Register dest = MI.getOperand(0).getReg();
13445 Register ptrA = MI.getOperand(1).getReg();
13446 Register ptrB = MI.getOperand(2).getReg();
13447 Register oldval = MI.getOperand(3).getReg();
13448 Register newval = MI.getOperand(4).getReg();
13449 DebugLoc dl = MI.getDebugLoc();
13450
13451 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13452 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13453 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13454 F->insert(It, loop1MBB);
13455 F->insert(It, loop2MBB);
13456 F->insert(It, exitMBB);
13457 exitMBB->splice(exitMBB->begin(), BB,
13458 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13460
13461 MachineRegisterInfo &RegInfo = F->getRegInfo();
13462 const TargetRegisterClass *RC =
13463 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13464 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13465
13466 Register PtrReg = RegInfo.createVirtualRegister(RC);
13467 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13468 Register ShiftReg =
13469 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13470 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13471 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13472 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13473 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13474 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13475 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13476 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13477 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13478 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13479 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13480 Register Ptr1Reg;
13481 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13482 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13483 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13484 // thisMBB:
13485 // ...
13486 // fallthrough --> loopMBB
13487 BB->addSuccessor(loop1MBB);
13488
13489 // The 4-byte load must be aligned, while a char or short may be
13490 // anywhere in the word. Hence all this nasty bookkeeping code.
13491 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13492 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13493 // xori shift, shift1, 24 [16]
13494 // rlwinm ptr, ptr1, 0, 0, 29
13495 // slw newval2, newval, shift
13496 // slw oldval2, oldval,shift
13497 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13498 // slw mask, mask2, shift
13499 // and newval3, newval2, mask
13500 // and oldval3, oldval2, mask
13501 // loop1MBB:
13502 // lwarx tmpDest, ptr
13503 // and tmp, tmpDest, mask
13504 // cmpw tmp, oldval3
13505 // bne- exitBB
13506 // loop2MBB:
13507 // andc tmp2, tmpDest, mask
13508 // or tmp4, tmp2, newval3
13509 // stwcx. tmp4, ptr
13510 // bne- loop1MBB
13511 // b exitBB
13512 // exitBB:
13513 // srw dest, tmpDest, shift
13514 if (ptrA != ZeroReg) {
13515 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13516 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13517 .addReg(ptrA)
13518 .addReg(ptrB);
13519 } else {
13520 Ptr1Reg = ptrB;
13521 }
13522
13523 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13524 // mode.
13525 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13526 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13527 .addImm(3)
13528 .addImm(27)
13529 .addImm(is8bit ? 28 : 27);
13530 if (!isLittleEndian)
13531 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13532 .addReg(Shift1Reg)
13533 .addImm(is8bit ? 24 : 16);
13534 if (is64bit)
13535 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13536 .addReg(Ptr1Reg)
13537 .addImm(0)
13538 .addImm(61);
13539 else
13540 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13541 .addReg(Ptr1Reg)
13542 .addImm(0)
13543 .addImm(0)
13544 .addImm(29);
13545 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13546 .addReg(newval)
13547 .addReg(ShiftReg);
13548 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13549 .addReg(oldval)
13550 .addReg(ShiftReg);
13551 if (is8bit)
13552 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13553 else {
13554 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13555 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13556 .addReg(Mask3Reg)
13557 .addImm(65535);
13558 }
13559 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13560 .addReg(Mask2Reg)
13561 .addReg(ShiftReg);
13562 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13563 .addReg(NewVal2Reg)
13564 .addReg(MaskReg);
13565 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13566 .addReg(OldVal2Reg)
13567 .addReg(MaskReg);
13568
13569 BB = loop1MBB;
13570 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13571 .addReg(ZeroReg)
13572 .addReg(PtrReg);
13573 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13574 .addReg(TmpDestReg)
13575 .addReg(MaskReg);
13576 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13577 .addReg(TmpReg)
13578 .addReg(OldVal3Reg);
13579 BuildMI(BB, dl, TII->get(PPC::BCC))
13581 .addReg(CrReg)
13582 .addMBB(exitMBB);
13583 BB->addSuccessor(loop2MBB);
13584 BB->addSuccessor(exitMBB);
13585
13586 BB = loop2MBB;
13587 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13588 .addReg(TmpDestReg)
13589 .addReg(MaskReg);
13590 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13591 .addReg(Tmp2Reg)
13592 .addReg(NewVal3Reg);
13593 BuildMI(BB, dl, TII->get(PPC::STWCX))
13594 .addReg(Tmp4Reg)
13595 .addReg(ZeroReg)
13596 .addReg(PtrReg);
13597 BuildMI(BB, dl, TII->get(PPC::BCC))
13599 .addReg(PPC::CR0)
13600 .addMBB(loop1MBB);
13601 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13602 BB->addSuccessor(loop1MBB);
13603 BB->addSuccessor(exitMBB);
13604
13605 // exitMBB:
13606 // ...
13607 BB = exitMBB;
13608 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13609 .addReg(TmpReg)
13610 .addReg(ShiftReg);
13611 } else if (MI.getOpcode() == PPC::FADDrtz) {
13612 // This pseudo performs an FADD with rounding mode temporarily forced
13613 // to round-to-zero. We emit this via custom inserter since the FPSCR
13614 // is not modeled at the SelectionDAG level.
13615 Register Dest = MI.getOperand(0).getReg();
13616 Register Src1 = MI.getOperand(1).getReg();
13617 Register Src2 = MI.getOperand(2).getReg();
13618 DebugLoc dl = MI.getDebugLoc();
13619
13620 MachineRegisterInfo &RegInfo = F->getRegInfo();
13621 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13622
13623 // Save FPSCR value.
13624 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13625
13626 // Set rounding mode to round-to-zero.
13627 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13628 .addImm(31)
13630
13631 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13632 .addImm(30)
13634
13635 // Perform addition.
13636 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13637 .addReg(Src1)
13638 .addReg(Src2);
13639 if (MI.getFlag(MachineInstr::NoFPExcept))
13641
13642 // Restore FPSCR value.
13643 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13644 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13645 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13646 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13647 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13648 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13649 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13650 ? PPC::ANDI8_rec
13651 : PPC::ANDI_rec;
13652 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13653 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13654
13655 MachineRegisterInfo &RegInfo = F->getRegInfo();
13656 Register Dest = RegInfo.createVirtualRegister(
13657 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13658
13659 DebugLoc Dl = MI.getDebugLoc();
13660 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13661 .addReg(MI.getOperand(1).getReg())
13662 .addImm(1);
13663 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13664 MI.getOperand(0).getReg())
13665 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13666 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13667 DebugLoc Dl = MI.getDebugLoc();
13668 MachineRegisterInfo &RegInfo = F->getRegInfo();
13669 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13670 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13671 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13672 MI.getOperand(0).getReg())
13673 .addReg(CRReg);
13674 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13675 DebugLoc Dl = MI.getDebugLoc();
13676 unsigned Imm = MI.getOperand(1).getImm();
13677 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13678 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13679 MI.getOperand(0).getReg())
13680 .addReg(PPC::CR0EQ);
13681 } else if (MI.getOpcode() == PPC::SETRNDi) {
13682 DebugLoc dl = MI.getDebugLoc();
13683 Register OldFPSCRReg = MI.getOperand(0).getReg();
13684
13685 // Save FPSCR value.
13686 if (MRI.use_empty(OldFPSCRReg))
13687 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13688 else
13689 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13690
13691 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13692 // the following settings:
13693 // 00 Round to nearest
13694 // 01 Round to 0
13695 // 10 Round to +inf
13696 // 11 Round to -inf
13697
13698 // When the operand is immediate, using the two least significant bits of
13699 // the immediate to set the bits 62:63 of FPSCR.
13700 unsigned Mode = MI.getOperand(1).getImm();
13701 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13702 .addImm(31)
13704
13705 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13706 .addImm(30)
13708 } else if (MI.getOpcode() == PPC::SETRND) {
13709 DebugLoc dl = MI.getDebugLoc();
13710
13711 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13712 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13713 // If the target doesn't have DirectMove, we should use stack to do the
13714 // conversion, because the target doesn't have the instructions like mtvsrd
13715 // or mfvsrd to do this conversion directly.
13716 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13717 if (Subtarget.hasDirectMove()) {
13718 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13719 .addReg(SrcReg);
13720 } else {
13721 // Use stack to do the register copy.
13722 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13723 MachineRegisterInfo &RegInfo = F->getRegInfo();
13724 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13725 if (RC == &PPC::F8RCRegClass) {
13726 // Copy register from F8RCRegClass to G8RCRegclass.
13727 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13728 "Unsupported RegClass.");
13729
13730 StoreOp = PPC::STFD;
13731 LoadOp = PPC::LD;
13732 } else {
13733 // Copy register from G8RCRegClass to F8RCRegclass.
13734 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13735 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13736 "Unsupported RegClass.");
13737 }
13738
13739 MachineFrameInfo &MFI = F->getFrameInfo();
13740 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13741
13742 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13743 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13745 MFI.getObjectAlign(FrameIdx));
13746
13747 // Store the SrcReg into the stack.
13748 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13749 .addReg(SrcReg)
13750 .addImm(0)
13751 .addFrameIndex(FrameIdx)
13752 .addMemOperand(MMOStore);
13753
13754 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13755 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13757 MFI.getObjectAlign(FrameIdx));
13758
13759 // Load from the stack where SrcReg is stored, and save to DestReg,
13760 // so we have done the RegClass conversion from RegClass::SrcReg to
13761 // RegClass::DestReg.
13762 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13763 .addImm(0)
13764 .addFrameIndex(FrameIdx)
13765 .addMemOperand(MMOLoad);
13766 }
13767 };
13768
13769 Register OldFPSCRReg = MI.getOperand(0).getReg();
13770
13771 // Save FPSCR value.
13772 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13773
13774 // When the operand is gprc register, use two least significant bits of the
13775 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13776 //
13777 // copy OldFPSCRTmpReg, OldFPSCRReg
13778 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13779 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13780 // copy NewFPSCRReg, NewFPSCRTmpReg
13781 // mtfsf 255, NewFPSCRReg
13782 MachineOperand SrcOp = MI.getOperand(1);
13783 MachineRegisterInfo &RegInfo = F->getRegInfo();
13784 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13785
13786 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13787
13788 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13789 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13790
13791 // The first operand of INSERT_SUBREG should be a register which has
13792 // subregisters, we only care about its RegClass, so we should use an
13793 // IMPLICIT_DEF register.
13794 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13795 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13796 .addReg(ImDefReg)
13797 .add(SrcOp)
13798 .addImm(1);
13799
13800 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13801 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13802 .addReg(OldFPSCRTmpReg)
13803 .addReg(ExtSrcReg)
13804 .addImm(0)
13805 .addImm(62);
13806
13807 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13808 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13809
13810 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13811 // bits of FPSCR.
13812 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13813 .addImm(255)
13814 .addReg(NewFPSCRReg)
13815 .addImm(0)
13816 .addImm(0);
13817 } else if (MI.getOpcode() == PPC::SETFLM) {
13818 DebugLoc Dl = MI.getDebugLoc();
13819
13820 // Result of setflm is previous FPSCR content, so we need to save it first.
13821 Register OldFPSCRReg = MI.getOperand(0).getReg();
13822 if (MRI.use_empty(OldFPSCRReg))
13823 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13824 else
13825 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13826
13827 // Put bits in 32:63 to FPSCR.
13828 Register NewFPSCRReg = MI.getOperand(1).getReg();
13829 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13830 .addImm(255)
13831 .addReg(NewFPSCRReg)
13832 .addImm(0)
13833 .addImm(0);
13834 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13835 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13836 return emitProbedAlloca(MI, BB);
13837 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13838 DebugLoc DL = MI.getDebugLoc();
13839 Register Src = MI.getOperand(2).getReg();
13840 Register Lo = MI.getOperand(0).getReg();
13841 Register Hi = MI.getOperand(1).getReg();
13842 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13843 .addDef(Lo)
13844 .addUse(Src, 0, PPC::sub_gp8_x1);
13845 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13846 .addDef(Hi)
13847 .addUse(Src, 0, PPC::sub_gp8_x0);
13848 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13849 MI.getOpcode() == PPC::STQX_PSEUDO) {
13850 DebugLoc DL = MI.getDebugLoc();
13851 // Ptr is used as the ptr_rc_no_r0 part
13852 // of LQ/STQ's memory operand and adding result of RA and RB,
13853 // so it has to be g8rc_and_g8rc_nox0.
13854 Register Ptr =
13855 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13856 Register Val = MI.getOperand(0).getReg();
13857 Register RA = MI.getOperand(1).getReg();
13858 Register RB = MI.getOperand(2).getReg();
13859 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13860 BuildMI(*BB, MI, DL,
13861 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13862 : TII->get(PPC::STQ))
13863 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13864 .addImm(0)
13865 .addReg(Ptr);
13866 } else {
13867 llvm_unreachable("Unexpected instr type to insert");
13868 }
13869
13870 MI.eraseFromParent(); // The pseudo instruction is gone now.
13871 return BB;
13872}
13873
13874//===----------------------------------------------------------------------===//
13875// Target Optimization Hooks
13876//===----------------------------------------------------------------------===//
13877
13878static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13879 // For the estimates, convergence is quadratic, so we essentially double the
13880 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13881 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13882 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13883 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13884 if (VT.getScalarType() == MVT::f64)
13885 RefinementSteps++;
13886 return RefinementSteps;
13887}
13888
13889SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13890 const DenormalMode &Mode) const {
13891 // We only have VSX Vector Test for software Square Root.
13892 EVT VT = Op.getValueType();
13893 if (!isTypeLegal(MVT::i1) ||
13894 (VT != MVT::f64 &&
13895 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13896 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13897
13898 SDLoc DL(Op);
13899 // The output register of FTSQRT is CR field.
13900 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13901 // ftsqrt BF,FRB
13902 // Let e_b be the unbiased exponent of the double-precision
13903 // floating-point operand in register FRB.
13904 // fe_flag is set to 1 if either of the following conditions occurs.
13905 // - The double-precision floating-point operand in register FRB is a zero,
13906 // a NaN, or an infinity, or a negative value.
13907 // - e_b is less than or equal to -970.
13908 // Otherwise fe_flag is set to 0.
13909 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13910 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13911 // exponent is less than -970)
13912 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13913 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13914 FTSQRT, SRIdxVal),
13915 0);
13916}
13917
13918SDValue
13919PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13920 SelectionDAG &DAG) const {
13921 // We only have VSX Vector Square Root.
13922 EVT VT = Op.getValueType();
13923 if (VT != MVT::f64 &&
13924 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13926
13927 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13928}
13929
13930SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13931 int Enabled, int &RefinementSteps,
13932 bool &UseOneConstNR,
13933 bool Reciprocal) const {
13934 EVT VT = Operand.getValueType();
13935 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13936 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13937 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13938 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13939 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13940 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13941
13942 // The Newton-Raphson computation with a single constant does not provide
13943 // enough accuracy on some CPUs.
13944 UseOneConstNR = !Subtarget.needsTwoConstNR();
13945 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13946 }
13947 return SDValue();
13948}
13949
13950SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13951 int Enabled,
13952 int &RefinementSteps) const {
13953 EVT VT = Operand.getValueType();
13954 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13955 (VT == MVT::f64 && Subtarget.hasFRE()) ||
13956 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13957 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13958 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13959 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13960 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13961 }
13962 return SDValue();
13963}
13964
13965unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13966 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13967 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13968 // enabled for division), this functionality is redundant with the default
13969 // combiner logic (once the division -> reciprocal/multiply transformation
13970 // has taken place). As a result, this matters more for older cores than for
13971 // newer ones.
13972
13973 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13974 // reciprocal if there are two or more FDIVs (for embedded cores with only
13975 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13976 switch (Subtarget.getCPUDirective()) {
13977 default:
13978 return 3;
13979 case PPC::DIR_440:
13980 case PPC::DIR_A2:
13981 case PPC::DIR_E500:
13982 case PPC::DIR_E500mc:
13983 case PPC::DIR_E5500:
13984 return 2;
13985 }
13986}
13987
13988// isConsecutiveLSLoc needs to work even if all adds have not yet been
13989// collapsed, and so we need to look through chains of them.
13991 int64_t& Offset, SelectionDAG &DAG) {
13992 if (DAG.isBaseWithConstantOffset(Loc)) {
13993 Base = Loc.getOperand(0);
13994 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13995
13996 // The base might itself be a base plus an offset, and if so, accumulate
13997 // that as well.
13998 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
13999 }
14000}
14001
14003 unsigned Bytes, int Dist,
14004 SelectionDAG &DAG) {
14005 if (VT.getSizeInBits() / 8 != Bytes)
14006 return false;
14007
14008 SDValue BaseLoc = Base->getBasePtr();
14009 if (Loc.getOpcode() == ISD::FrameIndex) {
14010 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14011 return false;
14013 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14014 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14015 int FS = MFI.getObjectSize(FI);
14016 int BFS = MFI.getObjectSize(BFI);
14017 if (FS != BFS || FS != (int)Bytes) return false;
14018 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14019 }
14020
14021 SDValue Base1 = Loc, Base2 = BaseLoc;
14022 int64_t Offset1 = 0, Offset2 = 0;
14023 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14024 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14025 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14026 return true;
14027
14028 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14029 const GlobalValue *GV1 = nullptr;
14030 const GlobalValue *GV2 = nullptr;
14031 Offset1 = 0;
14032 Offset2 = 0;
14033 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14034 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14035 if (isGA1 && isGA2 && GV1 == GV2)
14036 return Offset1 == (Offset2 + Dist*Bytes);
14037 return false;
14038}
14039
14040// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14041// not enforce equality of the chain operands.
14043 unsigned Bytes, int Dist,
14044 SelectionDAG &DAG) {
14046 EVT VT = LS->getMemoryVT();
14047 SDValue Loc = LS->getBasePtr();
14048 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14049 }
14050
14051 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14052 EVT VT;
14053 switch (N->getConstantOperandVal(1)) {
14054 default: return false;
14055 case Intrinsic::ppc_altivec_lvx:
14056 case Intrinsic::ppc_altivec_lvxl:
14057 case Intrinsic::ppc_vsx_lxvw4x:
14058 case Intrinsic::ppc_vsx_lxvw4x_be:
14059 VT = MVT::v4i32;
14060 break;
14061 case Intrinsic::ppc_vsx_lxvd2x:
14062 case Intrinsic::ppc_vsx_lxvd2x_be:
14063 VT = MVT::v2f64;
14064 break;
14065 case Intrinsic::ppc_altivec_lvebx:
14066 VT = MVT::i8;
14067 break;
14068 case Intrinsic::ppc_altivec_lvehx:
14069 VT = MVT::i16;
14070 break;
14071 case Intrinsic::ppc_altivec_lvewx:
14072 VT = MVT::i32;
14073 break;
14074 }
14075
14076 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14077 }
14078
14079 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14080 EVT VT;
14081 switch (N->getConstantOperandVal(1)) {
14082 default: return false;
14083 case Intrinsic::ppc_altivec_stvx:
14084 case Intrinsic::ppc_altivec_stvxl:
14085 case Intrinsic::ppc_vsx_stxvw4x:
14086 VT = MVT::v4i32;
14087 break;
14088 case Intrinsic::ppc_vsx_stxvd2x:
14089 VT = MVT::v2f64;
14090 break;
14091 case Intrinsic::ppc_vsx_stxvw4x_be:
14092 VT = MVT::v4i32;
14093 break;
14094 case Intrinsic::ppc_vsx_stxvd2x_be:
14095 VT = MVT::v2f64;
14096 break;
14097 case Intrinsic::ppc_altivec_stvebx:
14098 VT = MVT::i8;
14099 break;
14100 case Intrinsic::ppc_altivec_stvehx:
14101 VT = MVT::i16;
14102 break;
14103 case Intrinsic::ppc_altivec_stvewx:
14104 VT = MVT::i32;
14105 break;
14106 }
14107
14108 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14109 }
14110
14111 return false;
14112}
14113
14114// Return true is there is a nearyby consecutive load to the one provided
14115// (regardless of alignment). We search up and down the chain, looking though
14116// token factors and other loads (but nothing else). As a result, a true result
14117// indicates that it is safe to create a new consecutive load adjacent to the
14118// load provided.
14120 SDValue Chain = LD->getChain();
14121 EVT VT = LD->getMemoryVT();
14122
14123 SmallSet<SDNode *, 16> LoadRoots;
14124 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14125 SmallSet<SDNode *, 16> Visited;
14126
14127 // First, search up the chain, branching to follow all token-factor operands.
14128 // If we find a consecutive load, then we're done, otherwise, record all
14129 // nodes just above the top-level loads and token factors.
14130 while (!Queue.empty()) {
14131 SDNode *ChainNext = Queue.pop_back_val();
14132 if (!Visited.insert(ChainNext).second)
14133 continue;
14134
14135 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14136 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14137 return true;
14138
14139 if (!Visited.count(ChainLD->getChain().getNode()))
14140 Queue.push_back(ChainLD->getChain().getNode());
14141 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14142 for (const SDUse &O : ChainNext->ops())
14143 if (!Visited.count(O.getNode()))
14144 Queue.push_back(O.getNode());
14145 } else
14146 LoadRoots.insert(ChainNext);
14147 }
14148
14149 // Second, search down the chain, starting from the top-level nodes recorded
14150 // in the first phase. These top-level nodes are the nodes just above all
14151 // loads and token factors. Starting with their uses, recursively look though
14152 // all loads (just the chain uses) and token factors to find a consecutive
14153 // load.
14154 Visited.clear();
14155 Queue.clear();
14156
14157 for (SDNode *I : LoadRoots) {
14158 Queue.push_back(I);
14159
14160 while (!Queue.empty()) {
14161 SDNode *LoadRoot = Queue.pop_back_val();
14162 if (!Visited.insert(LoadRoot).second)
14163 continue;
14164
14165 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14166 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14167 return true;
14168
14169 for (SDNode *U : LoadRoot->uses())
14170 if (((isa<MemSDNode>(U) &&
14171 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14172 U->getOpcode() == ISD::TokenFactor) &&
14173 !Visited.count(U))
14174 Queue.push_back(U);
14175 }
14176 }
14177
14178 return false;
14179}
14180
14181/// This function is called when we have proved that a SETCC node can be replaced
14182/// by subtraction (and other supporting instructions) so that the result of
14183/// comparison is kept in a GPR instead of CR. This function is purely for
14184/// codegen purposes and has some flags to guide the codegen process.
14185static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14186 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14187 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14188
14189 // Zero extend the operands to the largest legal integer. Originally, they
14190 // must be of a strictly smaller size.
14191 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14192 DAG.getConstant(Size, DL, MVT::i32));
14193 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14194 DAG.getConstant(Size, DL, MVT::i32));
14195
14196 // Swap if needed. Depends on the condition code.
14197 if (Swap)
14198 std::swap(Op0, Op1);
14199
14200 // Subtract extended integers.
14201 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14202
14203 // Move the sign bit to the least significant position and zero out the rest.
14204 // Now the least significant bit carries the result of original comparison.
14205 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14206 DAG.getConstant(Size - 1, DL, MVT::i32));
14207 auto Final = Shifted;
14208
14209 // Complement the result if needed. Based on the condition code.
14210 if (Complement)
14211 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14212 DAG.getConstant(1, DL, MVT::i64));
14213
14214 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14215}
14216
14217SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14218 DAGCombinerInfo &DCI) const {
14219 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14220
14221 SelectionDAG &DAG = DCI.DAG;
14222 SDLoc DL(N);
14223
14224 // Size of integers being compared has a critical role in the following
14225 // analysis, so we prefer to do this when all types are legal.
14226 if (!DCI.isAfterLegalizeDAG())
14227 return SDValue();
14228
14229 // If all users of SETCC extend its value to a legal integer type
14230 // then we replace SETCC with a subtraction
14231 for (const SDNode *U : N->uses())
14232 if (U->getOpcode() != ISD::ZERO_EXTEND)
14233 return SDValue();
14234
14235 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14236 auto OpSize = N->getOperand(0).getValueSizeInBits();
14237
14239
14240 if (OpSize < Size) {
14241 switch (CC) {
14242 default: break;
14243 case ISD::SETULT:
14244 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14245 case ISD::SETULE:
14246 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14247 case ISD::SETUGT:
14248 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14249 case ISD::SETUGE:
14250 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14251 }
14252 }
14253
14254 return SDValue();
14255}
14256
14257SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14258 DAGCombinerInfo &DCI) const {
14259 SelectionDAG &DAG = DCI.DAG;
14260 SDLoc dl(N);
14261
14262 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14263 // If we're tracking CR bits, we need to be careful that we don't have:
14264 // trunc(binary-ops(zext(x), zext(y)))
14265 // or
14266 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14267 // such that we're unnecessarily moving things into GPRs when it would be
14268 // better to keep them in CR bits.
14269
14270 // Note that trunc here can be an actual i1 trunc, or can be the effective
14271 // truncation that comes from a setcc or select_cc.
14272 if (N->getOpcode() == ISD::TRUNCATE &&
14273 N->getValueType(0) != MVT::i1)
14274 return SDValue();
14275
14276 if (N->getOperand(0).getValueType() != MVT::i32 &&
14277 N->getOperand(0).getValueType() != MVT::i64)
14278 return SDValue();
14279
14280 if (N->getOpcode() == ISD::SETCC ||
14281 N->getOpcode() == ISD::SELECT_CC) {
14282 // If we're looking at a comparison, then we need to make sure that the
14283 // high bits (all except for the first) don't matter the result.
14285 cast<CondCodeSDNode>(N->getOperand(
14286 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14287 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14288
14290 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14291 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14292 return SDValue();
14293 } else if (ISD::isUnsignedIntSetCC(CC)) {
14294 if (!DAG.MaskedValueIsZero(N->getOperand(0),
14295 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14296 !DAG.MaskedValueIsZero(N->getOperand(1),
14297 APInt::getHighBitsSet(OpBits, OpBits-1)))
14298 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14299 : SDValue());
14300 } else {
14301 // This is neither a signed nor an unsigned comparison, just make sure
14302 // that the high bits are equal.
14303 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14304 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14305
14306 // We don't really care about what is known about the first bit (if
14307 // anything), so pretend that it is known zero for both to ensure they can
14308 // be compared as constants.
14309 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14310 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14311
14312 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14313 Op1Known.getConstant() != Op2Known.getConstant())
14314 return SDValue();
14315 }
14316 }
14317
14318 // We now know that the higher-order bits are irrelevant, we just need to
14319 // make sure that all of the intermediate operations are bit operations, and
14320 // all inputs are extensions.
14321 if (N->getOperand(0).getOpcode() != ISD::AND &&
14322 N->getOperand(0).getOpcode() != ISD::OR &&
14323 N->getOperand(0).getOpcode() != ISD::XOR &&
14324 N->getOperand(0).getOpcode() != ISD::SELECT &&
14325 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14326 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14327 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14328 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14329 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14330 return SDValue();
14331
14332 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14333 N->getOperand(1).getOpcode() != ISD::AND &&
14334 N->getOperand(1).getOpcode() != ISD::OR &&
14335 N->getOperand(1).getOpcode() != ISD::XOR &&
14336 N->getOperand(1).getOpcode() != ISD::SELECT &&
14337 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14338 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14339 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14340 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14341 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14342 return SDValue();
14343
14345 SmallVector<SDValue, 8> BinOps, PromOps;
14347
14348 for (unsigned i = 0; i < 2; ++i) {
14349 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14350 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14351 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14352 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14353 isa<ConstantSDNode>(N->getOperand(i)))
14354 Inputs.push_back(N->getOperand(i));
14355 else
14356 BinOps.push_back(N->getOperand(i));
14357
14358 if (N->getOpcode() == ISD::TRUNCATE)
14359 break;
14360 }
14361
14362 // Visit all inputs, collect all binary operations (and, or, xor and
14363 // select) that are all fed by extensions.
14364 while (!BinOps.empty()) {
14365 SDValue BinOp = BinOps.pop_back_val();
14366
14367 if (!Visited.insert(BinOp.getNode()).second)
14368 continue;
14369
14370 PromOps.push_back(BinOp);
14371
14372 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14373 // The condition of the select is not promoted.
14374 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14375 continue;
14376 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14377 continue;
14378
14379 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14380 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14381 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14382 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14383 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14384 Inputs.push_back(BinOp.getOperand(i));
14385 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14386 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14387 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14388 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14389 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14390 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14391 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14392 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14393 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14394 BinOps.push_back(BinOp.getOperand(i));
14395 } else {
14396 // We have an input that is not an extension or another binary
14397 // operation; we'll abort this transformation.
14398 return SDValue();
14399 }
14400 }
14401 }
14402
14403 // Make sure that this is a self-contained cluster of operations (which
14404 // is not quite the same thing as saying that everything has only one
14405 // use).
14406 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14407 if (isa<ConstantSDNode>(Inputs[i]))
14408 continue;
14409
14410 for (const SDNode *User : Inputs[i].getNode()->uses()) {
14411 if (User != N && !Visited.count(User))
14412 return SDValue();
14413
14414 // Make sure that we're not going to promote the non-output-value
14415 // operand(s) or SELECT or SELECT_CC.
14416 // FIXME: Although we could sometimes handle this, and it does occur in
14417 // practice that one of the condition inputs to the select is also one of
14418 // the outputs, we currently can't deal with this.
14419 if (User->getOpcode() == ISD::SELECT) {
14420 if (User->getOperand(0) == Inputs[i])
14421 return SDValue();
14422 } else if (User->getOpcode() == ISD::SELECT_CC) {
14423 if (User->getOperand(0) == Inputs[i] ||
14424 User->getOperand(1) == Inputs[i])
14425 return SDValue();
14426 }
14427 }
14428 }
14429
14430 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14431 for (const SDNode *User : PromOps[i].getNode()->uses()) {
14432 if (User != N && !Visited.count(User))
14433 return SDValue();
14434
14435 // Make sure that we're not going to promote the non-output-value
14436 // operand(s) or SELECT or SELECT_CC.
14437 // FIXME: Although we could sometimes handle this, and it does occur in
14438 // practice that one of the condition inputs to the select is also one of
14439 // the outputs, we currently can't deal with this.
14440 if (User->getOpcode() == ISD::SELECT) {
14441 if (User->getOperand(0) == PromOps[i])
14442 return SDValue();
14443 } else if (User->getOpcode() == ISD::SELECT_CC) {
14444 if (User->getOperand(0) == PromOps[i] ||
14445 User->getOperand(1) == PromOps[i])
14446 return SDValue();
14447 }
14448 }
14449 }
14450
14451 // Replace all inputs with the extension operand.
14452 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14453 // Constants may have users outside the cluster of to-be-promoted nodes,
14454 // and so we need to replace those as we do the promotions.
14455 if (isa<ConstantSDNode>(Inputs[i]))
14456 continue;
14457 else
14458 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14459 }
14460
14461 std::list<HandleSDNode> PromOpHandles;
14462 for (auto &PromOp : PromOps)
14463 PromOpHandles.emplace_back(PromOp);
14464
14465 // Replace all operations (these are all the same, but have a different
14466 // (i1) return type). DAG.getNode will validate that the types of
14467 // a binary operator match, so go through the list in reverse so that
14468 // we've likely promoted both operands first. Any intermediate truncations or
14469 // extensions disappear.
14470 while (!PromOpHandles.empty()) {
14471 SDValue PromOp = PromOpHandles.back().getValue();
14472 PromOpHandles.pop_back();
14473
14474 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14475 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14476 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14477 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14478 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14479 PromOp.getOperand(0).getValueType() != MVT::i1) {
14480 // The operand is not yet ready (see comment below).
14481 PromOpHandles.emplace_front(PromOp);
14482 continue;
14483 }
14484
14485 SDValue RepValue = PromOp.getOperand(0);
14486 if (isa<ConstantSDNode>(RepValue))
14487 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14488
14489 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14490 continue;
14491 }
14492
14493 unsigned C;
14494 switch (PromOp.getOpcode()) {
14495 default: C = 0; break;
14496 case ISD::SELECT: C = 1; break;
14497 case ISD::SELECT_CC: C = 2; break;
14498 }
14499
14500 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14501 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14502 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14503 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14504 // The to-be-promoted operands of this node have not yet been
14505 // promoted (this should be rare because we're going through the
14506 // list backward, but if one of the operands has several users in
14507 // this cluster of to-be-promoted nodes, it is possible).
14508 PromOpHandles.emplace_front(PromOp);
14509 continue;
14510 }
14511
14512 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
14513
14514 // If there are any constant inputs, make sure they're replaced now.
14515 for (unsigned i = 0; i < 2; ++i)
14516 if (isa<ConstantSDNode>(Ops[C+i]))
14517 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14518
14519 DAG.ReplaceAllUsesOfValueWith(PromOp,
14520 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14521 }
14522
14523 // Now we're left with the initial truncation itself.
14524 if (N->getOpcode() == ISD::TRUNCATE)
14525 return N->getOperand(0);
14526
14527 // Otherwise, this is a comparison. The operands to be compared have just
14528 // changed type (to i1), but everything else is the same.
14529 return SDValue(N, 0);
14530}
14531
14532SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14533 DAGCombinerInfo &DCI) const {
14534 SelectionDAG &DAG = DCI.DAG;
14535 SDLoc dl(N);
14536
14537 // If we're tracking CR bits, we need to be careful that we don't have:
14538 // zext(binary-ops(trunc(x), trunc(y)))
14539 // or
14540 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14541 // such that we're unnecessarily moving things into CR bits that can more
14542 // efficiently stay in GPRs. Note that if we're not certain that the high
14543 // bits are set as required by the final extension, we still may need to do
14544 // some masking to get the proper behavior.
14545
14546 // This same functionality is important on PPC64 when dealing with
14547 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14548 // the return values of functions. Because it is so similar, it is handled
14549 // here as well.
14550
14551 if (N->getValueType(0) != MVT::i32 &&
14552 N->getValueType(0) != MVT::i64)
14553 return SDValue();
14554
14555 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14556 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14557 return SDValue();
14558
14559 if (N->getOperand(0).getOpcode() != ISD::AND &&
14560 N->getOperand(0).getOpcode() != ISD::OR &&
14561 N->getOperand(0).getOpcode() != ISD::XOR &&
14562 N->getOperand(0).getOpcode() != ISD::SELECT &&
14563 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14564 return SDValue();
14565
14567 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14569
14570 // Visit all inputs, collect all binary operations (and, or, xor and
14571 // select) that are all fed by truncations.
14572 while (!BinOps.empty()) {
14573 SDValue BinOp = BinOps.pop_back_val();
14574
14575 if (!Visited.insert(BinOp.getNode()).second)
14576 continue;
14577
14578 PromOps.push_back(BinOp);
14579
14580 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14581 // The condition of the select is not promoted.
14582 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14583 continue;
14584 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14585 continue;
14586
14587 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14588 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14589 Inputs.push_back(BinOp.getOperand(i));
14590 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14591 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14592 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14593 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14594 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14595 BinOps.push_back(BinOp.getOperand(i));
14596 } else {
14597 // We have an input that is not a truncation or another binary
14598 // operation; we'll abort this transformation.
14599 return SDValue();
14600 }
14601 }
14602 }
14603
14604 // The operands of a select that must be truncated when the select is
14605 // promoted because the operand is actually part of the to-be-promoted set.
14606 DenseMap<SDNode *, EVT> SelectTruncOp[2];
14607
14608 // Make sure that this is a self-contained cluster of operations (which
14609 // is not quite the same thing as saying that everything has only one
14610 // use).
14611 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14612 if (isa<ConstantSDNode>(Inputs[i]))
14613 continue;
14614
14615 for (SDNode *User : Inputs[i].getNode()->uses()) {
14616 if (User != N && !Visited.count(User))
14617 return SDValue();
14618
14619 // If we're going to promote the non-output-value operand(s) or SELECT or
14620 // SELECT_CC, record them for truncation.
14621 if (User->getOpcode() == ISD::SELECT) {
14622 if (User->getOperand(0) == Inputs[i])
14623 SelectTruncOp[0].insert(std::make_pair(User,
14624 User->getOperand(0).getValueType()));
14625 } else if (User->getOpcode() == ISD::SELECT_CC) {
14626 if (User->getOperand(0) == Inputs[i])
14627 SelectTruncOp[0].insert(std::make_pair(User,
14628 User->getOperand(0).getValueType()));
14629 if (User->getOperand(1) == Inputs[i])
14630 SelectTruncOp[1].insert(std::make_pair(User,
14631 User->getOperand(1).getValueType()));
14632 }
14633 }
14634 }
14635
14636 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14637 for (SDNode *User : PromOps[i].getNode()->uses()) {
14638 if (User != N && !Visited.count(User))
14639 return SDValue();
14640
14641 // If we're going to promote the non-output-value operand(s) or SELECT or
14642 // SELECT_CC, record them for truncation.
14643 if (User->getOpcode() == ISD::SELECT) {
14644 if (User->getOperand(0) == PromOps[i])
14645 SelectTruncOp[0].insert(std::make_pair(User,
14646 User->getOperand(0).getValueType()));
14647 } else if (User->getOpcode() == ISD::SELECT_CC) {
14648 if (User->getOperand(0) == PromOps[i])
14649 SelectTruncOp[0].insert(std::make_pair(User,
14650 User->getOperand(0).getValueType()));
14651 if (User->getOperand(1) == PromOps[i])
14652 SelectTruncOp[1].insert(std::make_pair(User,
14653 User->getOperand(1).getValueType()));
14654 }
14655 }
14656 }
14657
14658 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14659 bool ReallyNeedsExt = false;
14660 if (N->getOpcode() != ISD::ANY_EXTEND) {
14661 // If all of the inputs are not already sign/zero extended, then
14662 // we'll still need to do that at the end.
14663 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14664 if (isa<ConstantSDNode>(Inputs[i]))
14665 continue;
14666
14667 unsigned OpBits =
14668 Inputs[i].getOperand(0).getValueSizeInBits();
14669 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14670
14671 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14672 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14673 APInt::getHighBitsSet(OpBits,
14674 OpBits-PromBits))) ||
14675 (N->getOpcode() == ISD::SIGN_EXTEND &&
14676 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14677 (OpBits-(PromBits-1)))) {
14678 ReallyNeedsExt = true;
14679 break;
14680 }
14681 }
14682 }
14683
14684 // Replace all inputs, either with the truncation operand, or a
14685 // truncation or extension to the final output type.
14686 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14687 // Constant inputs need to be replaced with the to-be-promoted nodes that
14688 // use them because they might have users outside of the cluster of
14689 // promoted nodes.
14690 if (isa<ConstantSDNode>(Inputs[i]))
14691 continue;
14692
14693 SDValue InSrc = Inputs[i].getOperand(0);
14694 if (Inputs[i].getValueType() == N->getValueType(0))
14695 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14696 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14697 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14698 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14699 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14700 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14701 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14702 else
14703 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14704 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14705 }
14706
14707 std::list<HandleSDNode> PromOpHandles;
14708 for (auto &PromOp : PromOps)
14709 PromOpHandles.emplace_back(PromOp);
14710
14711 // Replace all operations (these are all the same, but have a different
14712 // (promoted) return type). DAG.getNode will validate that the types of
14713 // a binary operator match, so go through the list in reverse so that
14714 // we've likely promoted both operands first.
14715 while (!PromOpHandles.empty()) {
14716 SDValue PromOp = PromOpHandles.back().getValue();
14717 PromOpHandles.pop_back();
14718
14719 unsigned C;
14720 switch (PromOp.getOpcode()) {
14721 default: C = 0; break;
14722 case ISD::SELECT: C = 1; break;
14723 case ISD::SELECT_CC: C = 2; break;
14724 }
14725
14726 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14727 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14728 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14729 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14730 // The to-be-promoted operands of this node have not yet been
14731 // promoted (this should be rare because we're going through the
14732 // list backward, but if one of the operands has several users in
14733 // this cluster of to-be-promoted nodes, it is possible).
14734 PromOpHandles.emplace_front(PromOp);
14735 continue;
14736 }
14737
14738 // For SELECT and SELECT_CC nodes, we do a similar check for any
14739 // to-be-promoted comparison inputs.
14740 if (PromOp.getOpcode() == ISD::SELECT ||
14741 PromOp.getOpcode() == ISD::SELECT_CC) {
14742 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14743 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14744 (SelectTruncOp[1].count(PromOp.getNode()) &&
14745 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14746 PromOpHandles.emplace_front(PromOp);
14747 continue;
14748 }
14749 }
14750
14752 PromOp.getNode()->op_end());
14753
14754 // If this node has constant inputs, then they'll need to be promoted here.
14755 for (unsigned i = 0; i < 2; ++i) {
14756 if (!isa<ConstantSDNode>(Ops[C+i]))
14757 continue;
14758 if (Ops[C+i].getValueType() == N->getValueType(0))
14759 continue;
14760
14761 if (N->getOpcode() == ISD::SIGN_EXTEND)
14762 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14763 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14764 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14765 else
14766 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14767 }
14768
14769 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14770 // truncate them again to the original value type.
14771 if (PromOp.getOpcode() == ISD::SELECT ||
14772 PromOp.getOpcode() == ISD::SELECT_CC) {
14773 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14774 if (SI0 != SelectTruncOp[0].end())
14775 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14776 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14777 if (SI1 != SelectTruncOp[1].end())
14778 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14779 }
14780
14781 DAG.ReplaceAllUsesOfValueWith(PromOp,
14782 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14783 }
14784
14785 // Now we're left with the initial extension itself.
14786 if (!ReallyNeedsExt)
14787 return N->getOperand(0);
14788
14789 // To zero extend, just mask off everything except for the first bit (in the
14790 // i1 case).
14791 if (N->getOpcode() == ISD::ZERO_EXTEND)
14792 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14794 N->getValueSizeInBits(0), PromBits),
14795 dl, N->getValueType(0)));
14796
14797 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14798 "Invalid extension type");
14799 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14800 SDValue ShiftCst =
14801 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14802 return DAG.getNode(
14803 ISD::SRA, dl, N->getValueType(0),
14804 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14805 ShiftCst);
14806}
14807
14808SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14809 DAGCombinerInfo &DCI) const {
14810 assert(N->getOpcode() == ISD::SETCC &&
14811 "Should be called with a SETCC node");
14812
14813 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14814 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14815 SDValue LHS = N->getOperand(0);
14816 SDValue RHS = N->getOperand(1);
14817
14818 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14819 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14820 LHS.hasOneUse())
14821 std::swap(LHS, RHS);
14822
14823 // x == 0-y --> x+y == 0
14824 // x != 0-y --> x+y != 0
14825 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14826 RHS.hasOneUse()) {
14827 SDLoc DL(N);
14828 SelectionDAG &DAG = DCI.DAG;
14829 EVT VT = N->getValueType(0);
14830 EVT OpVT = LHS.getValueType();
14831 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14832 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14833 }
14834 }
14835
14836 return DAGCombineTruncBoolExt(N, DCI);
14837}
14838
14839// Is this an extending load from an f32 to an f64?
14840static bool isFPExtLoad(SDValue Op) {
14841 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14842 return LD->getExtensionType() == ISD::EXTLOAD &&
14843 Op.getValueType() == MVT::f64;
14844 return false;
14845}
14846
14847/// Reduces the number of fp-to-int conversion when building a vector.
14848///
14849/// If this vector is built out of floating to integer conversions,
14850/// transform it to a vector built out of floating point values followed by a
14851/// single floating to integer conversion of the vector.
14852/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14853/// becomes (fptosi (build_vector ($A, $B, ...)))
14854SDValue PPCTargetLowering::
14855combineElementTruncationToVectorTruncation(SDNode *N,
14856 DAGCombinerInfo &DCI) const {
14857 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14858 "Should be called with a BUILD_VECTOR node");
14859
14860 SelectionDAG &DAG = DCI.DAG;
14861 SDLoc dl(N);
14862
14863 SDValue FirstInput = N->getOperand(0);
14864 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14865 "The input operand must be an fp-to-int conversion.");
14866
14867 // This combine happens after legalization so the fp_to_[su]i nodes are
14868 // already converted to PPCSISD nodes.
14869 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14870 if (FirstConversion == PPCISD::FCTIDZ ||
14871 FirstConversion == PPCISD::FCTIDUZ ||
14872 FirstConversion == PPCISD::FCTIWZ ||
14873 FirstConversion == PPCISD::FCTIWUZ) {
14874 bool IsSplat = true;
14875 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14876 FirstConversion == PPCISD::FCTIWUZ;
14877 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14879 EVT TargetVT = N->getValueType(0);
14880 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14881 SDValue NextOp = N->getOperand(i);
14882 if (NextOp.getOpcode() != PPCISD::MFVSR)
14883 return SDValue();
14884 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14885 if (NextConversion != FirstConversion)
14886 return SDValue();
14887 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14888 // This is not valid if the input was originally double precision. It is
14889 // also not profitable to do unless this is an extending load in which
14890 // case doing this combine will allow us to combine consecutive loads.
14891 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14892 return SDValue();
14893 if (N->getOperand(i) != FirstInput)
14894 IsSplat = false;
14895 }
14896
14897 // If this is a splat, we leave it as-is since there will be only a single
14898 // fp-to-int conversion followed by a splat of the integer. This is better
14899 // for 32-bit and smaller ints and neutral for 64-bit ints.
14900 if (IsSplat)
14901 return SDValue();
14902
14903 // Now that we know we have the right type of node, get its operands
14904 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14905 SDValue In = N->getOperand(i).getOperand(0);
14906 if (Is32Bit) {
14907 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14908 // here, we know that all inputs are extending loads so this is safe).
14909 if (In.isUndef())
14910 Ops.push_back(DAG.getUNDEF(SrcVT));
14911 else {
14912 SDValue Trunc =
14913 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14914 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14915 Ops.push_back(Trunc);
14916 }
14917 } else
14918 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14919 }
14920
14921 unsigned Opcode;
14922 if (FirstConversion == PPCISD::FCTIDZ ||
14923 FirstConversion == PPCISD::FCTIWZ)
14924 Opcode = ISD::FP_TO_SINT;
14925 else
14926 Opcode = ISD::FP_TO_UINT;
14927
14928 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14929 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14930 return DAG.getNode(Opcode, dl, TargetVT, BV);
14931 }
14932 return SDValue();
14933}
14934
14935/// Reduce the number of loads when building a vector.
14936///
14937/// Building a vector out of multiple loads can be converted to a load
14938/// of the vector type if the loads are consecutive. If the loads are
14939/// consecutive but in descending order, a shuffle is added at the end
14940/// to reorder the vector.
14942 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14943 "Should be called with a BUILD_VECTOR node");
14944
14945 SDLoc dl(N);
14946
14947 // Return early for non byte-sized type, as they can't be consecutive.
14948 if (!N->getValueType(0).getVectorElementType().isByteSized())
14949 return SDValue();
14950
14951 bool InputsAreConsecutiveLoads = true;
14952 bool InputsAreReverseConsecutive = true;
14953 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14954 SDValue FirstInput = N->getOperand(0);
14955 bool IsRoundOfExtLoad = false;
14956 LoadSDNode *FirstLoad = nullptr;
14957
14958 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14959 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14960 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14961 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14962 }
14963 // Not a build vector of (possibly fp_rounded) loads.
14964 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14965 N->getNumOperands() == 1)
14966 return SDValue();
14967
14968 if (!IsRoundOfExtLoad)
14969 FirstLoad = cast<LoadSDNode>(FirstInput);
14970
14972 InputLoads.push_back(FirstLoad);
14973 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14974 // If any inputs are fp_round(extload), they all must be.
14975 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14976 return SDValue();
14977
14978 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14979 N->getOperand(i);
14980 if (NextInput.getOpcode() != ISD::LOAD)
14981 return SDValue();
14982
14983 SDValue PreviousInput =
14984 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14985 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14986 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14987
14988 // If any inputs are fp_round(extload), they all must be.
14989 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14990 return SDValue();
14991
14992 // We only care about regular loads. The PPC-specific load intrinsics
14993 // will not lead to a merge opportunity.
14994 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14995 InputsAreConsecutiveLoads = false;
14996 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14997 InputsAreReverseConsecutive = false;
14998
14999 // Exit early if the loads are neither consecutive nor reverse consecutive.
15000 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
15001 return SDValue();
15002 InputLoads.push_back(LD2);
15003 }
15004
15005 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
15006 "The loads cannot be both consecutive and reverse consecutive.");
15007
15008 SDValue WideLoad;
15009 SDValue ReturnSDVal;
15010 if (InputsAreConsecutiveLoads) {
15011 assert(FirstLoad && "Input needs to be a LoadSDNode.");
15012 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
15013 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15014 FirstLoad->getAlign());
15015 ReturnSDVal = WideLoad;
15016 } else if (InputsAreReverseConsecutive) {
15017 LoadSDNode *LastLoad = InputLoads.back();
15018 assert(LastLoad && "Input needs to be a LoadSDNode.");
15019 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
15020 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
15021 LastLoad->getAlign());
15023 for (int i = N->getNumOperands() - 1; i >= 0; i--)
15024 Ops.push_back(i);
15025
15026 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
15027 DAG.getUNDEF(N->getValueType(0)), Ops);
15028 } else
15029 return SDValue();
15030
15031 for (auto *LD : InputLoads)
15032 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
15033 return ReturnSDVal;
15034}
15035
15036// This function adds the required vector_shuffle needed to get
15037// the elements of the vector extract in the correct position
15038// as specified by the CorrectElems encoding.
15040 SDValue Input, uint64_t Elems,
15041 uint64_t CorrectElems) {
15042 SDLoc dl(N);
15043
15044 unsigned NumElems = Input.getValueType().getVectorNumElements();
15045 SmallVector<int, 16> ShuffleMask(NumElems, -1);
15046
15047 // Knowing the element indices being extracted from the original
15048 // vector and the order in which they're being inserted, just put
15049 // them at element indices required for the instruction.
15050 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15051 if (DAG.getDataLayout().isLittleEndian())
15052 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
15053 else
15054 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
15055 CorrectElems = CorrectElems >> 8;
15056 Elems = Elems >> 8;
15057 }
15058
15059 SDValue Shuffle =
15060 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
15061 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
15062
15063 EVT VT = N->getValueType(0);
15064 SDValue Conv = DAG.getBitcast(VT, Shuffle);
15065
15066 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
15067 Input.getValueType().getVectorElementType(),
15069 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
15070 DAG.getValueType(ExtVT));
15071}
15072
15073// Look for build vector patterns where input operands come from sign
15074// extended vector_extract elements of specific indices. If the correct indices
15075// aren't used, add a vector shuffle to fix up the indices and create
15076// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
15077// during instruction selection.
15079 // This array encodes the indices that the vector sign extend instructions
15080 // extract from when extending from one type to another for both BE and LE.
15081 // The right nibble of each byte corresponds to the LE incides.
15082 // and the left nibble of each byte corresponds to the BE incides.
15083 // For example: 0x3074B8FC byte->word
15084 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
15085 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
15086 // For example: 0x000070F8 byte->double word
15087 // For LE: the allowed indices are: 0x0,0x8
15088 // For BE: the allowed indices are: 0x7,0xF
15089 uint64_t TargetElems[] = {
15090 0x3074B8FC, // b->w
15091 0x000070F8, // b->d
15092 0x10325476, // h->w
15093 0x00003074, // h->d
15094 0x00001032, // w->d
15095 };
15096
15097 uint64_t Elems = 0;
15098 int Index;
15099 SDValue Input;
15100
15101 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
15102 if (!Op)
15103 return false;
15104 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
15105 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
15106 return false;
15107
15108 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
15109 // of the right width.
15110 SDValue Extract = Op.getOperand(0);
15111 if (Extract.getOpcode() == ISD::ANY_EXTEND)
15112 Extract = Extract.getOperand(0);
15113 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15114 return false;
15115
15117 if (!ExtOp)
15118 return false;
15119
15120 Index = ExtOp->getZExtValue();
15121 if (Input && Input != Extract.getOperand(0))
15122 return false;
15123
15124 if (!Input)
15125 Input = Extract.getOperand(0);
15126
15127 Elems = Elems << 8;
15128 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
15129 Elems |= Index;
15130
15131 return true;
15132 };
15133
15134 // If the build vector operands aren't sign extended vector extracts,
15135 // of the same input vector, then return.
15136 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15137 if (!isSExtOfVecExtract(N->getOperand(i))) {
15138 return SDValue();
15139 }
15140 }
15141
15142 // If the vector extract indices are not correct, add the appropriate
15143 // vector_shuffle.
15144 int TgtElemArrayIdx;
15145 int InputSize = Input.getValueType().getScalarSizeInBits();
15146 int OutputSize = N->getValueType(0).getScalarSizeInBits();
15147 if (InputSize + OutputSize == 40)
15148 TgtElemArrayIdx = 0;
15149 else if (InputSize + OutputSize == 72)
15150 TgtElemArrayIdx = 1;
15151 else if (InputSize + OutputSize == 48)
15152 TgtElemArrayIdx = 2;
15153 else if (InputSize + OutputSize == 80)
15154 TgtElemArrayIdx = 3;
15155 else if (InputSize + OutputSize == 96)
15156 TgtElemArrayIdx = 4;
15157 else
15158 return SDValue();
15159
15160 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
15161 CorrectElems = DAG.getDataLayout().isLittleEndian()
15162 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
15163 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
15164 if (Elems != CorrectElems) {
15165 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
15166 }
15167
15168 // Regular lowering will catch cases where a shuffle is not needed.
15169 return SDValue();
15170}
15171
15172// Look for the pattern of a load from a narrow width to i128, feeding
15173// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
15174// (LXVRZX). This node represents a zero extending load that will be matched
15175// to the Load VSX Vector Rightmost instructions.
15177 SDLoc DL(N);
15178
15179 // This combine is only eligible for a BUILD_VECTOR of v1i128.
15180 if (N->getValueType(0) != MVT::v1i128)
15181 return SDValue();
15182
15183 SDValue Operand = N->getOperand(0);
15184 // Proceed with the transformation if the operand to the BUILD_VECTOR
15185 // is a load instruction.
15186 if (Operand.getOpcode() != ISD::LOAD)
15187 return SDValue();
15188
15189 auto *LD = cast<LoadSDNode>(Operand);
15190 EVT MemoryType = LD->getMemoryVT();
15191
15192 // This transformation is only valid if the we are loading either a byte,
15193 // halfword, word, or doubleword.
15194 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15195 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15196
15197 // Ensure that the load from the narrow width is being zero extended to i128.
15198 if (!ValidLDType ||
15199 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15200 LD->getExtensionType() != ISD::EXTLOAD))
15201 return SDValue();
15202
15203 SDValue LoadOps[] = {
15204 LD->getChain(), LD->getBasePtr(),
15205 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15206
15208 DAG.getVTList(MVT::v1i128, MVT::Other),
15209 LoadOps, MemoryType, LD->getMemOperand());
15210}
15211
15212SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15213 DAGCombinerInfo &DCI) const {
15214 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15215 "Should be called with a BUILD_VECTOR node");
15216
15217 SelectionDAG &DAG = DCI.DAG;
15218 SDLoc dl(N);
15219
15220 if (!Subtarget.hasVSX())
15221 return SDValue();
15222
15223 // The target independent DAG combiner will leave a build_vector of
15224 // float-to-int conversions intact. We can generate MUCH better code for
15225 // a float-to-int conversion of a vector of floats.
15226 SDValue FirstInput = N->getOperand(0);
15227 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15228 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15229 if (Reduced)
15230 return Reduced;
15231 }
15232
15233 // If we're building a vector out of consecutive loads, just load that
15234 // vector type.
15235 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15236 if (Reduced)
15237 return Reduced;
15238
15239 // If we're building a vector out of extended elements from another vector
15240 // we have P9 vector integer extend instructions. The code assumes legal
15241 // input types (i.e. it can't handle things like v4i16) so do not run before
15242 // legalization.
15243 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15244 Reduced = combineBVOfVecSExt(N, DAG);
15245 if (Reduced)
15246 return Reduced;
15247 }
15248
15249 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15250 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15251 // is a load from <valid narrow width> to i128.
15252 if (Subtarget.isISA3_1()) {
15253 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15254 if (BVOfZLoad)
15255 return BVOfZLoad;
15256 }
15257
15258 if (N->getValueType(0) != MVT::v2f64)
15259 return SDValue();
15260
15261 // Looking for:
15262 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15263 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15264 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15265 return SDValue();
15266 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15267 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15268 return SDValue();
15269 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15270 return SDValue();
15271
15272 SDValue Ext1 = FirstInput.getOperand(0);
15273 SDValue Ext2 = N->getOperand(1).getOperand(0);
15274 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15276 return SDValue();
15277
15280 if (!Ext1Op || !Ext2Op)
15281 return SDValue();
15282 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15283 Ext1.getOperand(0) != Ext2.getOperand(0))
15284 return SDValue();
15285
15286 int FirstElem = Ext1Op->getZExtValue();
15287 int SecondElem = Ext2Op->getZExtValue();
15288 int SubvecIdx;
15289 if (FirstElem == 0 && SecondElem == 1)
15290 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15291 else if (FirstElem == 2 && SecondElem == 3)
15292 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15293 else
15294 return SDValue();
15295
15296 SDValue SrcVec = Ext1.getOperand(0);
15297 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15299 return DAG.getNode(NodeType, dl, MVT::v2f64,
15300 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15301}
15302
15303SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15304 DAGCombinerInfo &DCI) const {
15305 assert((N->getOpcode() == ISD::SINT_TO_FP ||
15306 N->getOpcode() == ISD::UINT_TO_FP) &&
15307 "Need an int -> FP conversion node here");
15308
15309 if (useSoftFloat() || !Subtarget.has64BitSupport())
15310 return SDValue();
15311
15312 SelectionDAG &DAG = DCI.DAG;
15313 SDLoc dl(N);
15314 SDValue Op(N, 0);
15315
15316 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15317 // from the hardware.
15318 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15319 return SDValue();
15320 if (!Op.getOperand(0).getValueType().isSimple())
15321 return SDValue();
15322 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15323 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15324 return SDValue();
15325
15326 SDValue FirstOperand(Op.getOperand(0));
15327 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15328 (FirstOperand.getValueType() == MVT::i8 ||
15329 FirstOperand.getValueType() == MVT::i16);
15330 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15331 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15332 bool DstDouble = Op.getValueType() == MVT::f64;
15333 unsigned ConvOp = Signed ?
15334 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15335 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15336 SDValue WidthConst =
15337 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15338 dl, false);
15339 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15340 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15342 DAG.getVTList(MVT::f64, MVT::Other),
15343 Ops, MVT::i8, LDN->getMemOperand());
15344 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15345
15346 // For signed conversion, we need to sign-extend the value in the VSR
15347 if (Signed) {
15348 SDValue ExtOps[] = { Ld, WidthConst };
15349 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15350 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15351 } else
15352 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15353 }
15354
15355
15356 // For i32 intermediate values, unfortunately, the conversion functions
15357 // leave the upper 32 bits of the value are undefined. Within the set of
15358 // scalar instructions, we have no method for zero- or sign-extending the
15359 // value. Thus, we cannot handle i32 intermediate values here.
15360 if (Op.getOperand(0).getValueType() == MVT::i32)
15361 return SDValue();
15362
15363 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15364 "UINT_TO_FP is supported only with FPCVT");
15365
15366 // If we have FCFIDS, then use it when converting to single-precision.
15367 // Otherwise, convert to double-precision and then round.
15368 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15369 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15371 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15372 : PPCISD::FCFID);
15373 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15374 ? MVT::f32
15375 : MVT::f64;
15376
15377 // If we're converting from a float, to an int, and back to a float again,
15378 // then we don't need the store/load pair at all.
15379 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15380 Subtarget.hasFPCVT()) ||
15381 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15382 SDValue Src = Op.getOperand(0).getOperand(0);
15383 if (Src.getValueType() == MVT::f32) {
15384 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15385 DCI.AddToWorklist(Src.getNode());
15386 } else if (Src.getValueType() != MVT::f64) {
15387 // Make sure that we don't pick up a ppc_fp128 source value.
15388 return SDValue();
15389 }
15390
15391 unsigned FCTOp =
15392 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15394
15395 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15396 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15397
15398 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15399 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15400 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15401 DCI.AddToWorklist(FP.getNode());
15402 }
15403
15404 return FP;
15405 }
15406
15407 return SDValue();
15408}
15409
15410// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15411// builtins) into loads with swaps.
15413 DAGCombinerInfo &DCI) const {
15414 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15415 // load combines.
15416 if (DCI.isBeforeLegalizeOps())
15417 return SDValue();
15418
15419 SelectionDAG &DAG = DCI.DAG;
15420 SDLoc dl(N);
15421 SDValue Chain;
15422 SDValue Base;
15423 MachineMemOperand *MMO;
15424
15425 switch (N->getOpcode()) {
15426 default:
15427 llvm_unreachable("Unexpected opcode for little endian VSX load");
15428 case ISD::LOAD: {
15430 Chain = LD->getChain();
15431 Base = LD->getBasePtr();
15432 MMO = LD->getMemOperand();
15433 // If the MMO suggests this isn't a load of a full vector, leave
15434 // things alone. For a built-in, we have to make the change for
15435 // correctness, so if there is a size problem that will be a bug.
15436 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15437 return SDValue();
15438 break;
15439 }
15442 Chain = Intrin->getChain();
15443 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15444 // us what we want. Get operand 2 instead.
15445 Base = Intrin->getOperand(2);
15446 MMO = Intrin->getMemOperand();
15447 break;
15448 }
15449 }
15450
15451 MVT VecTy = N->getValueType(0).getSimpleVT();
15452
15453 SDValue LoadOps[] = { Chain, Base };
15455 DAG.getVTList(MVT::v2f64, MVT::Other),
15456 LoadOps, MVT::v2f64, MMO);
15457
15458 DCI.AddToWorklist(Load.getNode());
15459 Chain = Load.getValue(1);
15460 SDValue Swap = DAG.getNode(
15461 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15462 DCI.AddToWorklist(Swap.getNode());
15463
15464 // Add a bitcast if the resulting load type doesn't match v2f64.
15465 if (VecTy != MVT::v2f64) {
15466 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15467 DCI.AddToWorklist(N.getNode());
15468 // Package {bitcast value, swap's chain} to match Load's shape.
15469 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15470 N, Swap.getValue(1));
15471 }
15472
15473 return Swap;
15474}
15475
15476// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15477// builtins) into stores with swaps.
15479 DAGCombinerInfo &DCI) const {
15480 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15481 // store combines.
15482 if (DCI.isBeforeLegalizeOps())
15483 return SDValue();
15484
15485 SelectionDAG &DAG = DCI.DAG;
15486 SDLoc dl(N);
15487 SDValue Chain;
15488 SDValue Base;
15489 unsigned SrcOpnd;
15490 MachineMemOperand *MMO;
15491
15492 switch (N->getOpcode()) {
15493 default:
15494 llvm_unreachable("Unexpected opcode for little endian VSX store");
15495 case ISD::STORE: {
15497 Chain = ST->getChain();
15498 Base = ST->getBasePtr();
15499 MMO = ST->getMemOperand();
15500 SrcOpnd = 1;
15501 // If the MMO suggests this isn't a store of a full vector, leave
15502 // things alone. For a built-in, we have to make the change for
15503 // correctness, so if there is a size problem that will be a bug.
15504 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15505 return SDValue();
15506 break;
15507 }
15508 case ISD::INTRINSIC_VOID: {
15510 Chain = Intrin->getChain();
15511 // Intrin->getBasePtr() oddly does not get what we want.
15512 Base = Intrin->getOperand(3);
15513 MMO = Intrin->getMemOperand();
15514 SrcOpnd = 2;
15515 break;
15516 }
15517 }
15518
15519 SDValue Src = N->getOperand(SrcOpnd);
15520 MVT VecTy = Src.getValueType().getSimpleVT();
15521
15522 // All stores are done as v2f64 and possible bit cast.
15523 if (VecTy != MVT::v2f64) {
15524 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15525 DCI.AddToWorklist(Src.getNode());
15526 }
15527
15528 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15529 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15530 DCI.AddToWorklist(Swap.getNode());
15531 Chain = Swap.getValue(1);
15532 SDValue StoreOps[] = { Chain, Swap, Base };
15534 DAG.getVTList(MVT::Other),
15535 StoreOps, VecTy, MMO);
15536 DCI.AddToWorklist(Store.getNode());
15537 return Store;
15538}
15539
15540// Handle DAG combine for STORE (FP_TO_INT F).
15541SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15542 DAGCombinerInfo &DCI) const {
15543 SelectionDAG &DAG = DCI.DAG;
15544 SDLoc dl(N);
15545 unsigned Opcode = N->getOperand(1).getOpcode();
15546 (void)Opcode;
15547 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15548
15549 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15550 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15551 && "Not a FP_TO_INT Instruction!");
15552
15553 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15554 EVT Op1VT = N->getOperand(1).getValueType();
15555 EVT ResVT = Val.getValueType();
15556
15557 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15558 return SDValue();
15559
15560 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15561 bool ValidTypeForStoreFltAsInt =
15562 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15563 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15564
15565 // TODO: Lower conversion from f128 on all VSX targets
15566 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15567 return SDValue();
15568
15569 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15570 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15571 return SDValue();
15572
15573 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15574
15575 // Set number of bytes being converted.
15576 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15577 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15578 DAG.getIntPtrConstant(ByteSize, dl, false),
15579 DAG.getValueType(Op1VT)};
15580
15582 DAG.getVTList(MVT::Other), Ops,
15583 cast<StoreSDNode>(N)->getMemoryVT(),
15584 cast<StoreSDNode>(N)->getMemOperand());
15585
15586 return Val;
15587}
15588
15589static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15590 // Check that the source of the element keeps flipping
15591 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15592 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15593 for (int i = 1, e = Mask.size(); i < e; i++) {
15594 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15595 return false;
15596 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15597 return false;
15598 PrevElemFromFirstVec = !PrevElemFromFirstVec;
15599 }
15600 return true;
15601}
15602
15603static bool isSplatBV(SDValue Op) {
15604 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15605 return false;
15606 SDValue FirstOp;
15607
15608 // Find first non-undef input.
15609 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15610 FirstOp = Op.getOperand(i);
15611 if (!FirstOp.isUndef())
15612 break;
15613 }
15614
15615 // All inputs are undef or the same as the first non-undef input.
15616 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15617 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15618 return false;
15619 return true;
15620}
15621
15623 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15624 return Op;
15625 if (Op.getOpcode() != ISD::BITCAST)
15626 return SDValue();
15627 Op = Op.getOperand(0);
15628 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15629 return Op;
15630 return SDValue();
15631}
15632
15633// Fix up the shuffle mask to account for the fact that the result of
15634// scalar_to_vector is not in lane zero. This just takes all values in
15635// the ranges specified by the min/max indices and adds the number of
15636// elements required to ensure each element comes from the respective
15637// position in the valid lane.
15638// On little endian, that's just the corresponding element in the other
15639// half of the vector. On big endian, it is in the same half but right
15640// justified rather than left justified in that half.
15642 int LHSMaxIdx, int RHSMinIdx,
15643 int RHSMaxIdx, int HalfVec,
15644 unsigned ValidLaneWidth,
15645 const PPCSubtarget &Subtarget) {
15646 for (int i = 0, e = ShuffV.size(); i < e; i++) {
15647 int Idx = ShuffV[i];
15648 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15649 ShuffV[i] +=
15650 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15651 }
15652}
15653
15654// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15655// the original is:
15656// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15657// In such a case, just change the shuffle mask to extract the element
15658// from the permuted index.
15660 const PPCSubtarget &Subtarget) {
15661 SDLoc dl(OrigSToV);
15662 EVT VT = OrigSToV.getValueType();
15663 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15664 "Expecting a SCALAR_TO_VECTOR here");
15665 SDValue Input = OrigSToV.getOperand(0);
15666
15667 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15668 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15669 SDValue OrigVector = Input.getOperand(0);
15670
15671 // Can't handle non-const element indices or different vector types
15672 // for the input to the extract and the output of the scalar_to_vector.
15673 if (Idx && VT == OrigVector.getValueType()) {
15674 unsigned NumElts = VT.getVectorNumElements();
15675 assert(
15676 NumElts > 1 &&
15677 "Cannot produce a permuted scalar_to_vector for one element vector");
15678 SmallVector<int, 16> NewMask(NumElts, -1);
15679 unsigned ResultInElt = NumElts / 2;
15680 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15681 NewMask[ResultInElt] = Idx->getZExtValue();
15682 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15683 }
15684 }
15685 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15686 OrigSToV.getOperand(0));
15687}
15688
15689// On little endian subtargets, combine shuffles such as:
15690// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15691// into:
15692// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15693// because the latter can be matched to a single instruction merge.
15694// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15695// to put the value into element zero. Adjust the shuffle mask so that the
15696// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15697// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15698// nodes with elements smaller than doubleword because all the ways
15699// of getting scalar data into a vector register put the value in the
15700// rightmost element of the left half of the vector.
15701SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15702 SelectionDAG &DAG) const {
15703 SDValue LHS = SVN->getOperand(0);
15704 SDValue RHS = SVN->getOperand(1);
15705 auto Mask = SVN->getMask();
15706 int NumElts = LHS.getValueType().getVectorNumElements();
15707 SDValue Res(SVN, 0);
15708 SDLoc dl(SVN);
15709 bool IsLittleEndian = Subtarget.isLittleEndian();
15710
15711 // On big endian targets this is only useful for subtargets with direct moves.
15712 // On little endian targets it would be useful for all subtargets with VSX.
15713 // However adding special handling for LE subtargets without direct moves
15714 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15715 // which includes direct moves.
15716 if (!Subtarget.hasDirectMove())
15717 return Res;
15718
15719 // If this is not a shuffle of a shuffle and the first element comes from
15720 // the second vector, canonicalize to the commuted form. This will make it
15721 // more likely to match one of the single instruction patterns.
15722 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15723 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15724 std::swap(LHS, RHS);
15725 Res = DAG.getCommutedVectorShuffle(*SVN);
15726 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15727 }
15728
15729 // Adjust the shuffle mask if either input vector comes from a
15730 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15731 // form (to prevent the need for a swap).
15732 SmallVector<int, 16> ShuffV(Mask);
15733 SDValue SToVLHS = isScalarToVec(LHS);
15734 SDValue SToVRHS = isScalarToVec(RHS);
15735 if (SToVLHS || SToVRHS) {
15736 // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15737 // same type and have differing element sizes, then do not perform
15738 // the following transformation. The current transformation for
15739 // SCALAR_TO_VECTOR assumes that both input vectors have the same
15740 // element size. This will be updated in the future to account for
15741 // differing sizes of the LHS and RHS.
15742 if (SToVLHS && SToVRHS &&
15743 (SToVLHS.getValueType().getScalarSizeInBits() !=
15744 SToVRHS.getValueType().getScalarSizeInBits()))
15745 return Res;
15746
15747 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15748 : SToVRHS.getValueType().getVectorNumElements();
15749 int NumEltsOut = ShuffV.size();
15750 // The width of the "valid lane" (i.e. the lane that contains the value that
15751 // is vectorized) needs to be expressed in terms of the number of elements
15752 // of the shuffle. It is thereby the ratio of the values before and after
15753 // any bitcast.
15754 unsigned ValidLaneWidth =
15755 SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15756 LHS.getValueType().getScalarSizeInBits()
15757 : SToVRHS.getValueType().getScalarSizeInBits() /
15758 RHS.getValueType().getScalarSizeInBits();
15759
15760 // Initially assume that neither input is permuted. These will be adjusted
15761 // accordingly if either input is.
15762 int LHSMaxIdx = -1;
15763 int RHSMinIdx = -1;
15764 int RHSMaxIdx = -1;
15765 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15766
15767 // Get the permuted scalar to vector nodes for the source(s) that come from
15768 // ISD::SCALAR_TO_VECTOR.
15769 // On big endian systems, this only makes sense for element sizes smaller
15770 // than 64 bits since for 64-bit elements, all instructions already put
15771 // the value into element zero. Since scalar size of LHS and RHS may differ
15772 // after isScalarToVec, this should be checked using their own sizes.
15773 if (SToVLHS) {
15774 if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15775 return Res;
15776 // Set up the values for the shuffle vector fixup.
15777 LHSMaxIdx = NumEltsOut / NumEltsIn;
15778 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15779 if (SToVLHS.getValueType() != LHS.getValueType())
15780 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15781 LHS = SToVLHS;
15782 }
15783 if (SToVRHS) {
15784 if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15785 return Res;
15786 RHSMinIdx = NumEltsOut;
15787 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15788 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15789 if (SToVRHS.getValueType() != RHS.getValueType())
15790 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15791 RHS = SToVRHS;
15792 }
15793
15794 // Fix up the shuffle mask to reflect where the desired element actually is.
15795 // The minimum and maximum indices that correspond to element zero for both
15796 // the LHS and RHS are computed and will control which shuffle mask entries
15797 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15798 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15799 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15800 HalfVec, ValidLaneWidth, Subtarget);
15801 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15802
15803 // We may have simplified away the shuffle. We won't be able to do anything
15804 // further with it here.
15805 if (!isa<ShuffleVectorSDNode>(Res))
15806 return Res;
15807 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15808 }
15809
15810 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15811 // The common case after we commuted the shuffle is that the RHS is a splat
15812 // and we have elements coming in from the splat at indices that are not
15813 // conducive to using a merge.
15814 // Example:
15815 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15816 if (!isSplatBV(TheSplat))
15817 return Res;
15818
15819 // We are looking for a mask such that all even elements are from
15820 // one vector and all odd elements from the other.
15821 if (!isAlternatingShuffMask(Mask, NumElts))
15822 return Res;
15823
15824 // Adjust the mask so we are pulling in the same index from the splat
15825 // as the index from the interesting vector in consecutive elements.
15826 if (IsLittleEndian) {
15827 // Example (even elements from first vector):
15828 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15829 if (Mask[0] < NumElts)
15830 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15831 if (ShuffV[i] < 0)
15832 continue;
15833 // If element from non-splat is undef, pick first element from splat.
15834 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15835 }
15836 // Example (odd elements from first vector):
15837 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15838 else
15839 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15840 if (ShuffV[i] < 0)
15841 continue;
15842 // If element from non-splat is undef, pick first element from splat.
15843 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15844 }
15845 } else {
15846 // Example (even elements from first vector):
15847 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15848 if (Mask[0] < NumElts)
15849 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15850 if (ShuffV[i] < 0)
15851 continue;
15852 // If element from non-splat is undef, pick first element from splat.
15853 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
15854 }
15855 // Example (odd elements from first vector):
15856 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15857 else
15858 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15859 if (ShuffV[i] < 0)
15860 continue;
15861 // If element from non-splat is undef, pick first element from splat.
15862 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
15863 }
15864 }
15865
15866 // If the RHS has undefs, we need to remove them since we may have created
15867 // a shuffle that adds those instead of the splat value.
15868 SDValue SplatVal =
15869 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15870 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15871
15872 if (IsLittleEndian)
15873 RHS = TheSplat;
15874 else
15875 LHS = TheSplat;
15876 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15877}
15878
15879SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15880 LSBaseSDNode *LSBase,
15881 DAGCombinerInfo &DCI) const {
15882 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15883 "Not a reverse memop pattern!");
15884
15885 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15886 auto Mask = SVN->getMask();
15887 int i = 0;
15888 auto I = Mask.rbegin();
15889 auto E = Mask.rend();
15890
15891 for (; I != E; ++I) {
15892 if (*I != i)
15893 return false;
15894 i++;
15895 }
15896 return true;
15897 };
15898
15899 SelectionDAG &DAG = DCI.DAG;
15900 EVT VT = SVN->getValueType(0);
15901
15902 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15903 return SDValue();
15904
15905 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15906 // See comment in PPCVSXSwapRemoval.cpp.
15907 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15908 if (!Subtarget.hasP9Vector())
15909 return SDValue();
15910
15911 if(!IsElementReverse(SVN))
15912 return SDValue();
15913
15914 if (LSBase->getOpcode() == ISD::LOAD) {
15915 // If the load return value 0 has more than one user except the
15916 // shufflevector instruction, it is not profitable to replace the
15917 // shufflevector with a reverse load.
15918 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15919 UI != UE; ++UI)
15920 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15921 return SDValue();
15922
15923 SDLoc dl(LSBase);
15924 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15925 return DAG.getMemIntrinsicNode(
15926 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15927 LSBase->getMemoryVT(), LSBase->getMemOperand());
15928 }
15929
15930 if (LSBase->getOpcode() == ISD::STORE) {
15931 // If there are other uses of the shuffle, the swap cannot be avoided.
15932 // Forcing the use of an X-Form (since swapped stores only have
15933 // X-Forms) without removing the swap is unprofitable.
15934 if (!SVN->hasOneUse())
15935 return SDValue();
15936
15937 SDLoc dl(LSBase);
15938 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15939 LSBase->getBasePtr()};
15940 return DAG.getMemIntrinsicNode(
15941 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15942 LSBase->getMemoryVT(), LSBase->getMemOperand());
15943 }
15944
15945 llvm_unreachable("Expected a load or store node here");
15946}
15947
15948static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15949 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
15950 if (IntrinsicID == Intrinsic::ppc_stdcx)
15951 StoreWidth = 8;
15952 else if (IntrinsicID == Intrinsic::ppc_stwcx)
15953 StoreWidth = 4;
15954 else if (IntrinsicID == Intrinsic::ppc_sthcx)
15955 StoreWidth = 2;
15956 else if (IntrinsicID == Intrinsic::ppc_stbcx)
15957 StoreWidth = 1;
15958 else
15959 return false;
15960 return true;
15961}
15962
15964 DAGCombinerInfo &DCI) const {
15965 SelectionDAG &DAG = DCI.DAG;
15966 SDLoc dl(N);
15967 switch (N->getOpcode()) {
15968 default: break;
15969 case ISD::ADD:
15970 return combineADD(N, DCI);
15971 case ISD::AND: {
15972 // We don't want (and (zext (shift...)), C) if C fits in the width of the
15973 // original input as that will prevent us from selecting optimal rotates.
15974 // This only matters if the input to the extend is i32 widened to i64.
15975 SDValue Op1 = N->getOperand(0);
15976 SDValue Op2 = N->getOperand(1);
15977 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15978 Op1.getOpcode() != ISD::ANY_EXTEND) ||
15979 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15980 Op1.getOperand(0).getValueType() != MVT::i32)
15981 break;
15982 SDValue NarrowOp = Op1.getOperand(0);
15983 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15984 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15985 break;
15986
15987 uint64_t Imm = Op2->getAsZExtVal();
15988 // Make sure that the constant is narrow enough to fit in the narrow type.
15989 if (!isUInt<32>(Imm))
15990 break;
15991 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15992 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15993 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15994 }
15995 case ISD::SHL:
15996 return combineSHL(N, DCI);
15997 case ISD::SRA:
15998 return combineSRA(N, DCI);
15999 case ISD::SRL:
16000 return combineSRL(N, DCI);
16001 case ISD::MUL:
16002 return combineMUL(N, DCI);
16003 case ISD::FMA:
16004 case PPCISD::FNMSUB:
16005 return combineFMALike(N, DCI);
16006 case PPCISD::SHL:
16007 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
16008 return N->getOperand(0);
16009 break;
16010 case PPCISD::SRL:
16011 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
16012 return N->getOperand(0);
16013 break;
16014 case PPCISD::SRA:
16015 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
16016 if (C->isZero() || // 0 >>s V -> 0.
16017 C->isAllOnes()) // -1 >>s V -> -1.
16018 return N->getOperand(0);
16019 }
16020 break;
16021 case ISD::SIGN_EXTEND:
16022 case ISD::ZERO_EXTEND:
16023 case ISD::ANY_EXTEND:
16024 return DAGCombineExtBoolTrunc(N, DCI);
16025 case ISD::TRUNCATE:
16026 return combineTRUNCATE(N, DCI);
16027 case ISD::SETCC:
16028 if (SDValue CSCC = combineSetCC(N, DCI))
16029 return CSCC;
16030 [[fallthrough]];
16031 case ISD::SELECT_CC:
16032 return DAGCombineTruncBoolExt(N, DCI);
16033 case ISD::SINT_TO_FP:
16034 case ISD::UINT_TO_FP:
16035 return combineFPToIntToFP(N, DCI);
16037 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
16038 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
16039 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
16040 }
16041 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
16042 case ISD::STORE: {
16043
16044 EVT Op1VT = N->getOperand(1).getValueType();
16045 unsigned Opcode = N->getOperand(1).getOpcode();
16046
16047 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16048 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
16049 SDValue Val = combineStoreFPToInt(N, DCI);
16050 if (Val)
16051 return Val;
16052 }
16053
16054 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
16055 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
16056 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
16057 if (Val)
16058 return Val;
16059 }
16060
16061 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
16062 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
16063 N->getOperand(1).getNode()->hasOneUse() &&
16064 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
16065 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
16066
16067 // STBRX can only handle simple types and it makes no sense to store less
16068 // two bytes in byte-reversed order.
16069 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
16070 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
16071 break;
16072
16073 SDValue BSwapOp = N->getOperand(1).getOperand(0);
16074 // Do an any-extend to 32-bits if this is a half-word input.
16075 if (BSwapOp.getValueType() == MVT::i16)
16076 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
16077
16078 // If the type of BSWAP operand is wider than stored memory width
16079 // it need to be shifted to the right side before STBRX.
16080 if (Op1VT.bitsGT(mVT)) {
16081 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
16082 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
16083 DAG.getConstant(Shift, dl, MVT::i32));
16084 // Need to truncate if this is a bswap of i64 stored as i32/i16.
16085 if (Op1VT == MVT::i64)
16086 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
16087 }
16088
16089 SDValue Ops[] = {
16090 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
16091 };
16092 return
16093 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
16094 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
16095 cast<StoreSDNode>(N)->getMemOperand());
16096 }
16097
16098 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
16099 // So it can increase the chance of CSE constant construction.
16100 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
16101 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
16102 // Need to sign-extended to 64-bits to handle negative values.
16103 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
16104 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
16105 MemVT.getSizeInBits());
16106 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
16107
16108 // DAG.getTruncStore() can't be used here because it doesn't accept
16109 // the general (base + offset) addressing mode.
16110 // So we use UpdateNodeOperands and setTruncatingStore instead.
16111 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
16112 N->getOperand(3));
16113 cast<StoreSDNode>(N)->setTruncatingStore(true);
16114 return SDValue(N, 0);
16115 }
16116
16117 // For little endian, VSX stores require generating xxswapd/lxvd2x.
16118 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16119 if (Op1VT.isSimple()) {
16120 MVT StoreVT = Op1VT.getSimpleVT();
16121 if (Subtarget.needsSwapsForVSXMemOps() &&
16122 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
16123 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
16124 return expandVSXStoreForLE(N, DCI);
16125 }
16126 break;
16127 }
16128 case ISD::LOAD: {
16130 EVT VT = LD->getValueType(0);
16131
16132 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16133 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16134 if (VT.isSimple()) {
16135 MVT LoadVT = VT.getSimpleVT();
16136 if (Subtarget.needsSwapsForVSXMemOps() &&
16137 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
16138 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
16139 return expandVSXLoadForLE(N, DCI);
16140 }
16141
16142 // We sometimes end up with a 64-bit integer load, from which we extract
16143 // two single-precision floating-point numbers. This happens with
16144 // std::complex<float>, and other similar structures, because of the way we
16145 // canonicalize structure copies. However, if we lack direct moves,
16146 // then the final bitcasts from the extracted integer values to the
16147 // floating-point numbers turn into store/load pairs. Even with direct moves,
16148 // just loading the two floating-point numbers is likely better.
16149 auto ReplaceTwoFloatLoad = [&]() {
16150 if (VT != MVT::i64)
16151 return false;
16152
16153 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
16154 LD->isVolatile())
16155 return false;
16156
16157 // We're looking for a sequence like this:
16158 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
16159 // t16: i64 = srl t13, Constant:i32<32>
16160 // t17: i32 = truncate t16
16161 // t18: f32 = bitcast t17
16162 // t19: i32 = truncate t13
16163 // t20: f32 = bitcast t19
16164
16165 if (!LD->hasNUsesOfValue(2, 0))
16166 return false;
16167
16168 auto UI = LD->use_begin();
16169 while (UI.getUse().getResNo() != 0) ++UI;
16170 SDNode *Trunc = *UI++;
16171 while (UI.getUse().getResNo() != 0) ++UI;
16172 SDNode *RightShift = *UI;
16173 if (Trunc->getOpcode() != ISD::TRUNCATE)
16174 std::swap(Trunc, RightShift);
16175
16176 if (Trunc->getOpcode() != ISD::TRUNCATE ||
16177 Trunc->getValueType(0) != MVT::i32 ||
16178 !Trunc->hasOneUse())
16179 return false;
16180 if (RightShift->getOpcode() != ISD::SRL ||
16181 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
16182 RightShift->getConstantOperandVal(1) != 32 ||
16183 !RightShift->hasOneUse())
16184 return false;
16185
16186 SDNode *Trunc2 = *RightShift->use_begin();
16187 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16188 Trunc2->getValueType(0) != MVT::i32 ||
16189 !Trunc2->hasOneUse())
16190 return false;
16191
16192 SDNode *Bitcast = *Trunc->use_begin();
16193 SDNode *Bitcast2 = *Trunc2->use_begin();
16194
16195 if (Bitcast->getOpcode() != ISD::BITCAST ||
16196 Bitcast->getValueType(0) != MVT::f32)
16197 return false;
16198 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16199 Bitcast2->getValueType(0) != MVT::f32)
16200 return false;
16201
16202 if (Subtarget.isLittleEndian())
16203 std::swap(Bitcast, Bitcast2);
16204
16205 // Bitcast has the second float (in memory-layout order) and Bitcast2
16206 // has the first one.
16207
16208 SDValue BasePtr = LD->getBasePtr();
16209 if (LD->isIndexed()) {
16210 assert(LD->getAddressingMode() == ISD::PRE_INC &&
16211 "Non-pre-inc AM on PPC?");
16212 BasePtr =
16213 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16214 LD->getOffset());
16215 }
16216
16217 auto MMOFlags =
16218 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16219 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
16220 LD->getPointerInfo(), LD->getAlign(),
16221 MMOFlags, LD->getAAInfo());
16222 SDValue AddPtr =
16223 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
16224 BasePtr, DAG.getIntPtrConstant(4, dl));
16225 SDValue FloatLoad2 = DAG.getLoad(
16226 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
16227 LD->getPointerInfo().getWithOffset(4),
16228 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
16229
16230 if (LD->isIndexed()) {
16231 // Note that DAGCombine should re-form any pre-increment load(s) from
16232 // what is produced here if that makes sense.
16233 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
16234 }
16235
16236 DCI.CombineTo(Bitcast2, FloatLoad);
16237 DCI.CombineTo(Bitcast, FloatLoad2);
16238
16239 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
16240 SDValue(FloatLoad2.getNode(), 1));
16241 return true;
16242 };
16243
16244 if (ReplaceTwoFloatLoad())
16245 return SDValue(N, 0);
16246
16247 EVT MemVT = LD->getMemoryVT();
16248 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
16249 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16250 if (LD->isUnindexed() && VT.isVector() &&
16251 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16252 // P8 and later hardware should just use LOAD.
16253 !Subtarget.hasP8Vector() &&
16254 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16255 VT == MVT::v4f32))) &&
16256 LD->getAlign() < ABIAlignment) {
16257 // This is a type-legal unaligned Altivec load.
16258 SDValue Chain = LD->getChain();
16259 SDValue Ptr = LD->getBasePtr();
16260 bool isLittleEndian = Subtarget.isLittleEndian();
16261
16262 // This implements the loading of unaligned vectors as described in
16263 // the venerable Apple Velocity Engine overview. Specifically:
16264 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16265 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16266 //
16267 // The general idea is to expand a sequence of one or more unaligned
16268 // loads into an alignment-based permutation-control instruction (lvsl
16269 // or lvsr), a series of regular vector loads (which always truncate
16270 // their input address to an aligned address), and a series of
16271 // permutations. The results of these permutations are the requested
16272 // loaded values. The trick is that the last "extra" load is not taken
16273 // from the address you might suspect (sizeof(vector) bytes after the
16274 // last requested load), but rather sizeof(vector) - 1 bytes after the
16275 // last requested vector. The point of this is to avoid a page fault if
16276 // the base address happened to be aligned. This works because if the
16277 // base address is aligned, then adding less than a full vector length
16278 // will cause the last vector in the sequence to be (re)loaded.
16279 // Otherwise, the next vector will be fetched as you might suspect was
16280 // necessary.
16281
16282 // We might be able to reuse the permutation generation from
16283 // a different base address offset from this one by an aligned amount.
16284 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16285 // optimization later.
16286 Intrinsic::ID Intr, IntrLD, IntrPerm;
16287 MVT PermCntlTy, PermTy, LDTy;
16288 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16289 : Intrinsic::ppc_altivec_lvsl;
16290 IntrLD = Intrinsic::ppc_altivec_lvx;
16291 IntrPerm = Intrinsic::ppc_altivec_vperm;
16292 PermCntlTy = MVT::v16i8;
16293 PermTy = MVT::v4i32;
16294 LDTy = MVT::v4i32;
16295
16296 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16297
16298 // Create the new MMO for the new base load. It is like the original MMO,
16299 // but represents an area in memory almost twice the vector size centered
16300 // on the original address. If the address is unaligned, we might start
16301 // reading up to (sizeof(vector)-1) bytes below the address of the
16302 // original unaligned load.
16304 MachineMemOperand *BaseMMO =
16305 MF.getMachineMemOperand(LD->getMemOperand(),
16306 -(int64_t)MemVT.getStoreSize()+1,
16307 2*MemVT.getStoreSize()-1);
16308
16309 // Create the new base load.
16310 SDValue LDXIntID =
16311 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16312 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16313 SDValue BaseLoad =
16315 DAG.getVTList(PermTy, MVT::Other),
16316 BaseLoadOps, LDTy, BaseMMO);
16317
16318 // Note that the value of IncOffset (which is provided to the next
16319 // load's pointer info offset value, and thus used to calculate the
16320 // alignment), and the value of IncValue (which is actually used to
16321 // increment the pointer value) are different! This is because we
16322 // require the next load to appear to be aligned, even though it
16323 // is actually offset from the base pointer by a lesser amount.
16324 int IncOffset = VT.getSizeInBits() / 8;
16325 int IncValue = IncOffset;
16326
16327 // Walk (both up and down) the chain looking for another load at the real
16328 // (aligned) offset (the alignment of the other load does not matter in
16329 // this case). If found, then do not use the offset reduction trick, as
16330 // that will prevent the loads from being later combined (as they would
16331 // otherwise be duplicates).
16332 if (!findConsecutiveLoad(LD, DAG))
16333 --IncValue;
16334
16335 SDValue Increment =
16336 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16337 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16338
16339 MachineMemOperand *ExtraMMO =
16340 MF.getMachineMemOperand(LD->getMemOperand(),
16341 1, 2*MemVT.getStoreSize()-1);
16342 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16343 SDValue ExtraLoad =
16345 DAG.getVTList(PermTy, MVT::Other),
16346 ExtraLoadOps, LDTy, ExtraMMO);
16347
16348 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16349 BaseLoad.getValue(1), ExtraLoad.getValue(1));
16350
16351 // Because vperm has a big-endian bias, we must reverse the order
16352 // of the input vectors and complement the permute control vector
16353 // when generating little endian code. We have already handled the
16354 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16355 // and ExtraLoad here.
16356 SDValue Perm;
16357 if (isLittleEndian)
16358 Perm = BuildIntrinsicOp(IntrPerm,
16359 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16360 else
16361 Perm = BuildIntrinsicOp(IntrPerm,
16362 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16363
16364 if (VT != PermTy)
16365 Perm = Subtarget.hasAltivec()
16366 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16367 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16368 DAG.getTargetConstant(1, dl, MVT::i64));
16369 // second argument is 1 because this rounding
16370 // is always exact.
16371
16372 // The output of the permutation is our loaded result, the TokenFactor is
16373 // our new chain.
16374 DCI.CombineTo(N, Perm, TF);
16375 return SDValue(N, 0);
16376 }
16377 }
16378 break;
16380 bool isLittleEndian = Subtarget.isLittleEndian();
16381 unsigned IID = N->getConstantOperandVal(0);
16382 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16383 : Intrinsic::ppc_altivec_lvsl);
16384 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16385 SDValue Add = N->getOperand(1);
16386
16387 int Bits = 4 /* 16 byte alignment */;
16388
16389 if (DAG.MaskedValueIsZero(Add->getOperand(1),
16390 APInt::getAllOnes(Bits /* alignment */)
16391 .zext(Add.getScalarValueSizeInBits()))) {
16392 SDNode *BasePtr = Add->getOperand(0).getNode();
16393 for (SDNode *U : BasePtr->uses()) {
16394 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16395 U->getConstantOperandVal(0) == IID) {
16396 // We've found another LVSL/LVSR, and this address is an aligned
16397 // multiple of that one. The results will be the same, so use the
16398 // one we've just found instead.
16399
16400 return SDValue(U, 0);
16401 }
16402 }
16403 }
16404
16405 if (isa<ConstantSDNode>(Add->getOperand(1))) {
16406 SDNode *BasePtr = Add->getOperand(0).getNode();
16407 for (SDNode *U : BasePtr->uses()) {
16408 if (U->getOpcode() == ISD::ADD &&
16409 isa<ConstantSDNode>(U->getOperand(1)) &&
16410 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16411 (1ULL << Bits) ==
16412 0) {
16413 SDNode *OtherAdd = U;
16414 for (SDNode *V : OtherAdd->uses()) {
16415 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16416 V->getConstantOperandVal(0) == IID) {
16417 return SDValue(V, 0);
16418 }
16419 }
16420 }
16421 }
16422 }
16423 }
16424
16425 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16426 // Expose the vabsduw/h/b opportunity for down stream
16427 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16428 (IID == Intrinsic::ppc_altivec_vmaxsw ||
16429 IID == Intrinsic::ppc_altivec_vmaxsh ||
16430 IID == Intrinsic::ppc_altivec_vmaxsb)) {
16431 SDValue V1 = N->getOperand(1);
16432 SDValue V2 = N->getOperand(2);
16433 if ((V1.getSimpleValueType() == MVT::v4i32 ||
16434 V1.getSimpleValueType() == MVT::v8i16 ||
16435 V1.getSimpleValueType() == MVT::v16i8) &&
16436 V1.getSimpleValueType() == V2.getSimpleValueType()) {
16437 // (0-a, a)
16438 if (V1.getOpcode() == ISD::SUB &&
16440 V1.getOperand(1) == V2) {
16441 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16442 }
16443 // (a, 0-a)
16444 if (V2.getOpcode() == ISD::SUB &&
16445 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16446 V2.getOperand(1) == V1) {
16447 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16448 }
16449 // (x-y, y-x)
16450 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16451 V1.getOperand(0) == V2.getOperand(1) &&
16452 V1.getOperand(1) == V2.getOperand(0)) {
16453 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16454 }
16455 }
16456 }
16457 }
16458
16459 break;
16461 switch (N->getConstantOperandVal(1)) {
16462 default:
16463 break;
16464 case Intrinsic::ppc_altivec_vsum4sbs:
16465 case Intrinsic::ppc_altivec_vsum4shs:
16466 case Intrinsic::ppc_altivec_vsum4ubs: {
16467 // These sum-across intrinsics only have a chain due to the side effect
16468 // that they may set the SAT bit. If we know the SAT bit will not be set
16469 // for some inputs, we can replace any uses of their chain with the
16470 // input chain.
16471 if (BuildVectorSDNode *BVN =
16472 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16473 APInt APSplatBits, APSplatUndef;
16474 unsigned SplatBitSize;
16475 bool HasAnyUndefs;
16476 bool BVNIsConstantSplat = BVN->isConstantSplat(
16477 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16478 !Subtarget.isLittleEndian());
16479 // If the constant splat vector is 0, the SAT bit will not be set.
16480 if (BVNIsConstantSplat && APSplatBits == 0)
16481 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16482 }
16483 return SDValue();
16484 }
16485 case Intrinsic::ppc_vsx_lxvw4x:
16486 case Intrinsic::ppc_vsx_lxvd2x:
16487 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16488 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16489 if (Subtarget.needsSwapsForVSXMemOps())
16490 return expandVSXLoadForLE(N, DCI);
16491 break;
16492 }
16493 break;
16495 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16496 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16497 if (Subtarget.needsSwapsForVSXMemOps()) {
16498 switch (N->getConstantOperandVal(1)) {
16499 default:
16500 break;
16501 case Intrinsic::ppc_vsx_stxvw4x:
16502 case Intrinsic::ppc_vsx_stxvd2x:
16503 return expandVSXStoreForLE(N, DCI);
16504 }
16505 }
16506 break;
16507 case ISD::BSWAP: {
16508 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16509 // For subtargets without LDBRX, we can still do better than the default
16510 // expansion even for 64-bit BSWAP (LOAD).
16511 bool Is64BitBswapOn64BitTgt =
16512 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16513 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16514 N->getOperand(0).hasOneUse();
16515 if (IsSingleUseNormalLd &&
16516 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16517 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16518 SDValue Load = N->getOperand(0);
16519 LoadSDNode *LD = cast<LoadSDNode>(Load);
16520 // Create the byte-swapping load.
16521 SDValue Ops[] = {
16522 LD->getChain(), // Chain
16523 LD->getBasePtr(), // Ptr
16524 DAG.getValueType(N->getValueType(0)) // VT
16525 };
16526 SDValue BSLoad =
16528 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16529 MVT::i64 : MVT::i32, MVT::Other),
16530 Ops, LD->getMemoryVT(), LD->getMemOperand());
16531
16532 // If this is an i16 load, insert the truncate.
16533 SDValue ResVal = BSLoad;
16534 if (N->getValueType(0) == MVT::i16)
16535 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16536
16537 // First, combine the bswap away. This makes the value produced by the
16538 // load dead.
16539 DCI.CombineTo(N, ResVal);
16540
16541 // Next, combine the load away, we give it a bogus result value but a real
16542 // chain result. The result value is dead because the bswap is dead.
16543 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16544
16545 // Return N so it doesn't get rechecked!
16546 return SDValue(N, 0);
16547 }
16548 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16549 // before legalization so that the BUILD_PAIR is handled correctly.
16550 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16551 !IsSingleUseNormalLd)
16552 return SDValue();
16553 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16554
16555 // Can't split volatile or atomic loads.
16556 if (!LD->isSimple())
16557 return SDValue();
16558 SDValue BasePtr = LD->getBasePtr();
16559 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16560 LD->getPointerInfo(), LD->getAlign());
16561 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16562 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16563 DAG.getIntPtrConstant(4, dl));
16565 LD->getMemOperand(), 4, 4);
16566 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16567 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16568 SDValue Res;
16569 if (Subtarget.isLittleEndian())
16570 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16571 else
16572 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16573 SDValue TF =
16574 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16575 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16576 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16577 return Res;
16578 }
16579 case PPCISD::VCMP:
16580 // If a VCMP_rec node already exists with exactly the same operands as this
16581 // node, use its result instead of this node (VCMP_rec computes both a CR6
16582 // and a normal output).
16583 //
16584 if (!N->getOperand(0).hasOneUse() &&
16585 !N->getOperand(1).hasOneUse() &&
16586 !N->getOperand(2).hasOneUse()) {
16587
16588 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16589 SDNode *VCMPrecNode = nullptr;
16590
16591 SDNode *LHSN = N->getOperand(0).getNode();
16592 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16593 UI != E; ++UI)
16594 if (UI->getOpcode() == PPCISD::VCMP_rec &&
16595 UI->getOperand(1) == N->getOperand(1) &&
16596 UI->getOperand(2) == N->getOperand(2) &&
16597 UI->getOperand(0) == N->getOperand(0)) {
16598 VCMPrecNode = *UI;
16599 break;
16600 }
16601
16602 // If there is no VCMP_rec node, or if the flag value has a single use,
16603 // don't transform this.
16604 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16605 break;
16606
16607 // Look at the (necessarily single) use of the flag value. If it has a
16608 // chain, this transformation is more complex. Note that multiple things
16609 // could use the value result, which we should ignore.
16610 SDNode *FlagUser = nullptr;
16611 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16612 FlagUser == nullptr; ++UI) {
16613 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16614 SDNode *User = *UI;
16615 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16616 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16617 FlagUser = User;
16618 break;
16619 }
16620 }
16621 }
16622
16623 // If the user is a MFOCRF instruction, we know this is safe.
16624 // Otherwise we give up for right now.
16625 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16626 return SDValue(VCMPrecNode, 0);
16627 }
16628 break;
16629 case ISD::BR_CC: {
16630 // If this is a branch on an altivec predicate comparison, lower this so
16631 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16632 // lowering is done pre-legalize, because the legalizer lowers the predicate
16633 // compare down to code that is difficult to reassemble.
16634 // This code also handles branches that depend on the result of a store
16635 // conditional.
16636 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16637 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16638
16639 int CompareOpc;
16640 bool isDot;
16641
16642 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16643 break;
16644
16645 // Since we are doing this pre-legalize, the RHS can be a constant of
16646 // arbitrary bitwidth which may cause issues when trying to get the value
16647 // from the underlying APInt.
16648 auto RHSAPInt = RHS->getAsAPIntVal();
16649 if (!RHSAPInt.isIntN(64))
16650 break;
16651
16652 unsigned Val = RHSAPInt.getZExtValue();
16653 auto isImpossibleCompare = [&]() {
16654 // If this is a comparison against something other than 0/1, then we know
16655 // that the condition is never/always true.
16656 if (Val != 0 && Val != 1) {
16657 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16658 return N->getOperand(0);
16659 // Always !=, turn it into an unconditional branch.
16660 return DAG.getNode(ISD::BR, dl, MVT::Other,
16661 N->getOperand(0), N->getOperand(4));
16662 }
16663 return SDValue();
16664 };
16665 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16666 unsigned StoreWidth = 0;
16667 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16668 isStoreConditional(LHS, StoreWidth)) {
16669 if (SDValue Impossible = isImpossibleCompare())
16670 return Impossible;
16671 PPC::Predicate CompOpc;
16672 // eq 0 => ne
16673 // ne 0 => eq
16674 // eq 1 => eq
16675 // ne 1 => ne
16676 if (Val == 0)
16677 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16678 else
16679 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16680
16681 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16682 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16683 auto *MemNode = cast<MemSDNode>(LHS);
16684 SDValue ConstSt = DAG.getMemIntrinsicNode(
16686 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16687 MemNode->getMemoryVT(), MemNode->getMemOperand());
16688
16689 SDValue InChain;
16690 // Unchain the branch from the original store conditional.
16691 if (N->getOperand(0) == LHS.getValue(1))
16692 InChain = LHS.getOperand(0);
16693 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16694 SmallVector<SDValue, 4> InChains;
16695 SDValue InTF = N->getOperand(0);
16696 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16697 if (InTF.getOperand(i) != LHS.getValue(1))
16698 InChains.push_back(InTF.getOperand(i));
16699 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16700 }
16701
16702 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16703 DAG.getConstant(CompOpc, dl, MVT::i32),
16704 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16705 ConstSt.getValue(2));
16706 }
16707
16708 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16709 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16710 assert(isDot && "Can't compare against a vector result!");
16711
16712 if (SDValue Impossible = isImpossibleCompare())
16713 return Impossible;
16714
16715 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16716 // Create the PPCISD altivec 'dot' comparison node.
16717 SDValue Ops[] = {
16718 LHS.getOperand(2), // LHS of compare
16719 LHS.getOperand(3), // RHS of compare
16720 DAG.getConstant(CompareOpc, dl, MVT::i32)
16721 };
16722 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16723 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16724
16725 // Unpack the result based on how the target uses it.
16726 PPC::Predicate CompOpc;
16727 switch (LHS.getConstantOperandVal(1)) {
16728 default: // Can't happen, don't crash on invalid number though.
16729 case 0: // Branch on the value of the EQ bit of CR6.
16730 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16731 break;
16732 case 1: // Branch on the inverted value of the EQ bit of CR6.
16733 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16734 break;
16735 case 2: // Branch on the value of the LT bit of CR6.
16736 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16737 break;
16738 case 3: // Branch on the inverted value of the LT bit of CR6.
16739 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16740 break;
16741 }
16742
16743 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16744 DAG.getConstant(CompOpc, dl, MVT::i32),
16745 DAG.getRegister(PPC::CR6, MVT::i32),
16746 N->getOperand(4), CompNode.getValue(1));
16747 }
16748 break;
16749 }
16750 case ISD::BUILD_VECTOR:
16751 return DAGCombineBuildVector(N, DCI);
16752 }
16753
16754 return SDValue();
16755}
16756
16757SDValue
16759 SelectionDAG &DAG,
16760 SmallVectorImpl<SDNode *> &Created) const {
16761 // fold (sdiv X, pow2)
16762 EVT VT = N->getValueType(0);
16763 if (VT == MVT::i64 && !Subtarget.isPPC64())
16764 return SDValue();
16765 if ((VT != MVT::i32 && VT != MVT::i64) ||
16766 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16767 return SDValue();
16768
16769 SDLoc DL(N);
16770 SDValue N0 = N->getOperand(0);
16771
16772 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16773 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16774 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16775
16776 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16777 Created.push_back(Op.getNode());
16778
16779 if (IsNegPow2) {
16780 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16781 Created.push_back(Op.getNode());
16782 }
16783
16784 return Op;
16785}
16786
16787//===----------------------------------------------------------------------===//
16788// Inline Assembly Support
16789//===----------------------------------------------------------------------===//
16790
16792 KnownBits &Known,
16793 const APInt &DemandedElts,
16794 const SelectionDAG &DAG,
16795 unsigned Depth) const {
16796 Known.resetAll();
16797 switch (Op.getOpcode()) {
16798 default: break;
16799 case PPCISD::LBRX: {
16800 // lhbrx is known to have the top bits cleared out.
16801 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16802 Known.Zero = 0xFFFF0000;
16803 break;
16804 }
16806 switch (Op.getConstantOperandVal(0)) {
16807 default: break;
16808 case Intrinsic::ppc_altivec_vcmpbfp_p:
16809 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16810 case Intrinsic::ppc_altivec_vcmpequb_p:
16811 case Intrinsic::ppc_altivec_vcmpequh_p:
16812 case Intrinsic::ppc_altivec_vcmpequw_p:
16813 case Intrinsic::ppc_altivec_vcmpequd_p:
16814 case Intrinsic::ppc_altivec_vcmpequq_p:
16815 case Intrinsic::ppc_altivec_vcmpgefp_p:
16816 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16817 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16818 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16819 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16820 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16821 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16822 case Intrinsic::ppc_altivec_vcmpgtub_p:
16823 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16824 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16825 case Intrinsic::ppc_altivec_vcmpgtud_p:
16826 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16827 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16828 break;
16829 }
16830 break;
16831 }
16833 switch (Op.getConstantOperandVal(1)) {
16834 default:
16835 break;
16836 case Intrinsic::ppc_load2r:
16837 // Top bits are cleared for load2r (which is the same as lhbrx).
16838 Known.Zero = 0xFFFF0000;
16839 break;
16840 }
16841 break;
16842 }
16843 }
16844}
16845
16847 switch (Subtarget.getCPUDirective()) {
16848 default: break;
16849 case PPC::DIR_970:
16850 case PPC::DIR_PWR4:
16851 case PPC::DIR_PWR5:
16852 case PPC::DIR_PWR5X:
16853 case PPC::DIR_PWR6:
16854 case PPC::DIR_PWR6X:
16855 case PPC::DIR_PWR7:
16856 case PPC::DIR_PWR8:
16857 case PPC::DIR_PWR9:
16858 case PPC::DIR_PWR10:
16859 case PPC::DIR_PWR11:
16860 case PPC::DIR_PWR_FUTURE: {
16861 if (!ML)
16862 break;
16863
16865 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16866 // so that we can decrease cache misses and branch-prediction misses.
16867 // Actual alignment of the loop will depend on the hotness check and other
16868 // logic in alignBlocks.
16869 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16870 return Align(32);
16871 }
16872
16873 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16874
16875 // For small loops (between 5 and 8 instructions), align to a 32-byte
16876 // boundary so that the entire loop fits in one instruction-cache line.
16877 uint64_t LoopSize = 0;
16878 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16879 for (const MachineInstr &J : **I) {
16880 LoopSize += TII->getInstSizeInBytes(J);
16881 if (LoopSize > 32)
16882 break;
16883 }
16884
16885 if (LoopSize > 16 && LoopSize <= 32)
16886 return Align(32);
16887
16888 break;
16889 }
16890 }
16891
16893}
16894
16895/// getConstraintType - Given a constraint, return the type of
16896/// constraint it is for this target.
16899 if (Constraint.size() == 1) {
16900 switch (Constraint[0]) {
16901 default: break;
16902 case 'b':
16903 case 'r':
16904 case 'f':
16905 case 'd':
16906 case 'v':
16907 case 'y':
16908 return C_RegisterClass;
16909 case 'Z':
16910 // FIXME: While Z does indicate a memory constraint, it specifically
16911 // indicates an r+r address (used in conjunction with the 'y' modifier
16912 // in the replacement string). Currently, we're forcing the base
16913 // register to be r0 in the asm printer (which is interpreted as zero)
16914 // and forming the complete address in the second register. This is
16915 // suboptimal.
16916 return C_Memory;
16917 }
16918 } else if (Constraint == "wc") { // individual CR bits.
16919 return C_RegisterClass;
16920 } else if (Constraint == "wa" || Constraint == "wd" ||
16921 Constraint == "wf" || Constraint == "ws" ||
16922 Constraint == "wi" || Constraint == "ww") {
16923 return C_RegisterClass; // VSX registers.
16924 }
16925 return TargetLowering::getConstraintType(Constraint);
16926}
16927
16928/// Examine constraint type and operand type and determine a weight value.
16929/// This object must already have been set up with the operand type
16930/// and the current alternative constraint selected.
16933 AsmOperandInfo &info, const char *constraint) const {
16935 Value *CallOperandVal = info.CallOperandVal;
16936 // If we don't have a value, we can't do a match,
16937 // but allow it at the lowest weight.
16938 if (!CallOperandVal)
16939 return CW_Default;
16940 Type *type = CallOperandVal->getType();
16941
16942 // Look at the constraint type.
16943 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16944 return CW_Register; // an individual CR bit.
16945 else if ((StringRef(constraint) == "wa" ||
16946 StringRef(constraint) == "wd" ||
16947 StringRef(constraint) == "wf") &&
16948 type->isVectorTy())
16949 return CW_Register;
16950 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16951 return CW_Register; // just hold 64-bit integers data.
16952 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16953 return CW_Register;
16954 else if (StringRef(constraint) == "ww" && type->isFloatTy())
16955 return CW_Register;
16956
16957 switch (*constraint) {
16958 default:
16960 break;
16961 case 'b':
16962 if (type->isIntegerTy())
16963 weight = CW_Register;
16964 break;
16965 case 'f':
16966 if (type->isFloatTy())
16967 weight = CW_Register;
16968 break;
16969 case 'd':
16970 if (type->isDoubleTy())
16971 weight = CW_Register;
16972 break;
16973 case 'v':
16974 if (type->isVectorTy())
16975 weight = CW_Register;
16976 break;
16977 case 'y':
16978 weight = CW_Register;
16979 break;
16980 case 'Z':
16981 weight = CW_Memory;
16982 break;
16983 }
16984 return weight;
16985}
16986
16987std::pair<unsigned, const TargetRegisterClass *>
16989 StringRef Constraint,
16990 MVT VT) const {
16991 if (Constraint.size() == 1) {
16992 // GCC RS6000 Constraint Letters
16993 switch (Constraint[0]) {
16994 case 'b': // R1-R31
16995 if (VT == MVT::i64 && Subtarget.isPPC64())
16996 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16997 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16998 case 'r': // R0-R31
16999 if (VT == MVT::i64 && Subtarget.isPPC64())
17000 return std::make_pair(0U, &PPC::G8RCRegClass);
17001 return std::make_pair(0U, &PPC::GPRCRegClass);
17002 // 'd' and 'f' constraints are both defined to be "the floating point
17003 // registers", where one is for 32-bit and the other for 64-bit. We don't
17004 // really care overly much here so just give them all the same reg classes.
17005 case 'd':
17006 case 'f':
17007 if (Subtarget.hasSPE()) {
17008 if (VT == MVT::f32 || VT == MVT::i32)
17009 return std::make_pair(0U, &PPC::GPRCRegClass);
17010 if (VT == MVT::f64 || VT == MVT::i64)
17011 return std::make_pair(0U, &PPC::SPERCRegClass);
17012 } else {
17013 if (VT == MVT::f32 || VT == MVT::i32)
17014 return std::make_pair(0U, &PPC::F4RCRegClass);
17015 if (VT == MVT::f64 || VT == MVT::i64)
17016 return std::make_pair(0U, &PPC::F8RCRegClass);
17017 }
17018 break;
17019 case 'v':
17020 if (Subtarget.hasAltivec() && VT.isVector())
17021 return std::make_pair(0U, &PPC::VRRCRegClass);
17022 else if (Subtarget.hasVSX())
17023 // Scalars in Altivec registers only make sense with VSX.
17024 return std::make_pair(0U, &PPC::VFRCRegClass);
17025 break;
17026 case 'y': // crrc
17027 return std::make_pair(0U, &PPC::CRRCRegClass);
17028 }
17029 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
17030 // An individual CR bit.
17031 return std::make_pair(0U, &PPC::CRBITRCRegClass);
17032 } else if ((Constraint == "wa" || Constraint == "wd" ||
17033 Constraint == "wf" || Constraint == "wi") &&
17034 Subtarget.hasVSX()) {
17035 // A VSX register for either a scalar (FP) or vector. There is no
17036 // support for single precision scalars on subtargets prior to Power8.
17037 if (VT.isVector())
17038 return std::make_pair(0U, &PPC::VSRCRegClass);
17039 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17040 return std::make_pair(0U, &PPC::VSSRCRegClass);
17041 return std::make_pair(0U, &PPC::VSFRCRegClass);
17042 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
17043 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17044 return std::make_pair(0U, &PPC::VSSRCRegClass);
17045 else
17046 return std::make_pair(0U, &PPC::VSFRCRegClass);
17047 } else if (Constraint == "lr") {
17048 if (VT == MVT::i64)
17049 return std::make_pair(0U, &PPC::LR8RCRegClass);
17050 else
17051 return std::make_pair(0U, &PPC::LRRCRegClass);
17052 }
17053
17054 // Handle special cases of physical registers that are not properly handled
17055 // by the base class.
17056 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
17057 // If we name a VSX register, we can't defer to the base class because it
17058 // will not recognize the correct register (their names will be VSL{0-31}
17059 // and V{0-31} so they won't match). So we match them here.
17060 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
17061 int VSNum = atoi(Constraint.data() + 3);
17062 assert(VSNum >= 0 && VSNum <= 63 &&
17063 "Attempted to access a vsr out of range");
17064 if (VSNum < 32)
17065 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
17066 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
17067 }
17068
17069 // For float registers, we can't defer to the base class as it will match
17070 // the SPILLTOVSRRC class.
17071 if (Constraint.size() > 3 && Constraint[1] == 'f') {
17072 int RegNum = atoi(Constraint.data() + 2);
17073 if (RegNum > 31 || RegNum < 0)
17074 report_fatal_error("Invalid floating point register number");
17075 if (VT == MVT::f32 || VT == MVT::i32)
17076 return Subtarget.hasSPE()
17077 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
17078 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
17079 if (VT == MVT::f64 || VT == MVT::i64)
17080 return Subtarget.hasSPE()
17081 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
17082 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
17083 }
17084 }
17085
17086 std::pair<unsigned, const TargetRegisterClass *> R =
17088
17089 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
17090 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
17091 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
17092 // register.
17093 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
17094 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
17095 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
17096 PPC::GPRCRegClass.contains(R.first))
17097 return std::make_pair(TRI->getMatchingSuperReg(R.first,
17098 PPC::sub_32, &PPC::G8RCRegClass),
17099 &PPC::G8RCRegClass);
17100
17101 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
17102 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
17103 R.first = PPC::CR0;
17104 R.second = &PPC::CRRCRegClass;
17105 }
17106 // FIXME: This warning should ideally be emitted in the front end.
17107 const auto &TM = getTargetMachine();
17108 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
17109 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
17110 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
17111 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
17112 errs() << "warning: vector registers 20 to 32 are reserved in the "
17113 "default AIX AltiVec ABI and cannot be used\n";
17114 }
17115
17116 return R;
17117}
17118
17119/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
17120/// vector. If it is invalid, don't add anything to Ops.
17122 StringRef Constraint,
17123 std::vector<SDValue> &Ops,
17124 SelectionDAG &DAG) const {
17125 SDValue Result;
17126
17127 // Only support length 1 constraints.
17128 if (Constraint.size() > 1)
17129 return;
17130
17131 char Letter = Constraint[0];
17132 switch (Letter) {
17133 default: break;
17134 case 'I':
17135 case 'J':
17136 case 'K':
17137 case 'L':
17138 case 'M':
17139 case 'N':
17140 case 'O':
17141 case 'P': {
17143 if (!CST) return; // Must be an immediate to match.
17144 SDLoc dl(Op);
17145 int64_t Value = CST->getSExtValue();
17146 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
17147 // numbers are printed as such.
17148 switch (Letter) {
17149 default: llvm_unreachable("Unknown constraint letter!");
17150 case 'I': // "I" is a signed 16-bit constant.
17151 if (isInt<16>(Value))
17152 Result = DAG.getTargetConstant(Value, dl, TCVT);
17153 break;
17154 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
17156 Result = DAG.getTargetConstant(Value, dl, TCVT);
17157 break;
17158 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
17160 Result = DAG.getTargetConstant(Value, dl, TCVT);
17161 break;
17162 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
17163 if (isUInt<16>(Value))
17164 Result = DAG.getTargetConstant(Value, dl, TCVT);
17165 break;
17166 case 'M': // "M" is a constant that is greater than 31.
17167 if (Value > 31)
17168 Result = DAG.getTargetConstant(Value, dl, TCVT);
17169 break;
17170 case 'N': // "N" is a positive constant that is an exact power of two.
17171 if (Value > 0 && isPowerOf2_64(Value))
17172 Result = DAG.getTargetConstant(Value, dl, TCVT);
17173 break;
17174 case 'O': // "O" is the constant zero.
17175 if (Value == 0)
17176 Result = DAG.getTargetConstant(Value, dl, TCVT);
17177 break;
17178 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
17179 if (isInt<16>(-Value))
17180 Result = DAG.getTargetConstant(Value, dl, TCVT);
17181 break;
17182 }
17183 break;
17184 }
17185 }
17186
17187 if (Result.getNode()) {
17188 Ops.push_back(Result);
17189 return;
17190 }
17191
17192 // Handle standard constraint letters.
17193 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17194}
17195
17198 SelectionDAG &DAG) const {
17199 if (I.getNumOperands() <= 1)
17200 return;
17201 if (!isa<ConstantSDNode>(Ops[1].getNode()))
17202 return;
17203 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17204 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17205 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17206 return;
17207
17208 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
17209 Ops.push_back(DAG.getMDNode(MDN));
17210}
17211
17212// isLegalAddressingMode - Return true if the addressing mode represented
17213// by AM is legal for this target, for a load/store of the specified type.
17215 const AddrMode &AM, Type *Ty,
17216 unsigned AS,
17217 Instruction *I) const {
17218 // Vector type r+i form is supported since power9 as DQ form. We don't check
17219 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17220 // imm form is preferred and the offset can be adjusted to use imm form later
17221 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17222 // max offset to check legal addressing mode, we should be a little aggressive
17223 // to contain other offsets for that LSRUse.
17224 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17225 return false;
17226
17227 // PPC allows a sign-extended 16-bit immediate field.
17228 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17229 return false;
17230
17231 // No global is ever allowed as a base.
17232 if (AM.BaseGV)
17233 return false;
17234
17235 // PPC only support r+r,
17236 switch (AM.Scale) {
17237 case 0: // "r+i" or just "i", depending on HasBaseReg.
17238 break;
17239 case 1:
17240 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
17241 return false;
17242 // Otherwise we have r+r or r+i.
17243 break;
17244 case 2:
17245 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
17246 return false;
17247 // Allow 2*r as r+r.
17248 break;
17249 default:
17250 // No other scales are supported.
17251 return false;
17252 }
17253
17254 return true;
17255}
17256
17257SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17258 SelectionDAG &DAG) const {
17260 MachineFrameInfo &MFI = MF.getFrameInfo();
17261 MFI.setReturnAddressIsTaken(true);
17262
17264 return SDValue();
17265
17266 SDLoc dl(Op);
17267 unsigned Depth = Op.getConstantOperandVal(0);
17268
17269 // Make sure the function does not optimize away the store of the RA to
17270 // the stack.
17271 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17272 FuncInfo->setLRStoreRequired();
17273 bool isPPC64 = Subtarget.isPPC64();
17274 auto PtrVT = getPointerTy(MF.getDataLayout());
17275
17276 if (Depth > 0) {
17277 // The link register (return address) is saved in the caller's frame
17278 // not the callee's stack frame. So we must get the caller's frame
17279 // address and load the return address at the LR offset from there.
17280 SDValue FrameAddr =
17281 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17282 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17283 SDValue Offset =
17284 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17285 isPPC64 ? MVT::i64 : MVT::i32);
17286 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17287 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17289 }
17290
17291 // Just load the return address off the stack.
17292 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17293 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17295}
17296
17297SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17298 SelectionDAG &DAG) const {
17299 SDLoc dl(Op);
17300 unsigned Depth = Op.getConstantOperandVal(0);
17301
17303 MachineFrameInfo &MFI = MF.getFrameInfo();
17304 MFI.setFrameAddressIsTaken(true);
17305
17306 EVT PtrVT = getPointerTy(MF.getDataLayout());
17307 bool isPPC64 = PtrVT == MVT::i64;
17308
17309 // Naked functions never have a frame pointer, and so we use r1. For all
17310 // other functions, this decision must be delayed until during PEI.
17311 unsigned FrameReg;
17312 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17313 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17314 else
17315 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17316
17317 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17318 PtrVT);
17319 while (Depth--)
17320 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17321 FrameAddr, MachinePointerInfo());
17322 return FrameAddr;
17323}
17324
17325// FIXME? Maybe this could be a TableGen attribute on some registers and
17326// this table could be generated automatically from RegInfo.
17328 const MachineFunction &MF) const {
17329 bool isPPC64 = Subtarget.isPPC64();
17330
17331 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
17332 if (!is64Bit && VT != LLT::scalar(32))
17333 report_fatal_error("Invalid register global variable type");
17334
17336 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
17337 .Case("r2", isPPC64 ? Register() : PPC::R2)
17338 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
17339 .Default(Register());
17340
17341 if (Reg)
17342 return Reg;
17343 report_fatal_error("Invalid register name global variable");
17344}
17345
17347 // 32-bit SVR4 ABI access everything as got-indirect.
17348 if (Subtarget.is32BitELFABI())
17349 return true;
17350
17351 // AIX accesses everything indirectly through the TOC, which is similar to
17352 // the GOT.
17353 if (Subtarget.isAIXABI())
17354 return true;
17355
17357 // If it is small or large code model, module locals are accessed
17358 // indirectly by loading their address from .toc/.got.
17359 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17360 return true;
17361
17362 // JumpTable and BlockAddress are accessed as got-indirect.
17364 return true;
17365
17367 return Subtarget.isGVIndirectSymbol(G->getGlobal());
17368
17369 return false;
17370}
17371
17372bool
17374 // The PowerPC target isn't yet aware of offsets.
17375 return false;
17376}
17377
17379 const CallInst &I,
17380 MachineFunction &MF,
17381 unsigned Intrinsic) const {
17382 switch (Intrinsic) {
17383 case Intrinsic::ppc_atomicrmw_xchg_i128:
17384 case Intrinsic::ppc_atomicrmw_add_i128:
17385 case Intrinsic::ppc_atomicrmw_sub_i128:
17386 case Intrinsic::ppc_atomicrmw_nand_i128:
17387 case Intrinsic::ppc_atomicrmw_and_i128:
17388 case Intrinsic::ppc_atomicrmw_or_i128:
17389 case Intrinsic::ppc_atomicrmw_xor_i128:
17390 case Intrinsic::ppc_cmpxchg_i128:
17391 Info.opc = ISD::INTRINSIC_W_CHAIN;
17392 Info.memVT = MVT::i128;
17393 Info.ptrVal = I.getArgOperand(0);
17394 Info.offset = 0;
17395 Info.align = Align(16);
17398 return true;
17399 case Intrinsic::ppc_atomic_load_i128:
17400 Info.opc = ISD::INTRINSIC_W_CHAIN;
17401 Info.memVT = MVT::i128;
17402 Info.ptrVal = I.getArgOperand(0);
17403 Info.offset = 0;
17404 Info.align = Align(16);
17406 return true;
17407 case Intrinsic::ppc_atomic_store_i128:
17408 Info.opc = ISD::INTRINSIC_VOID;
17409 Info.memVT = MVT::i128;
17410 Info.ptrVal = I.getArgOperand(2);
17411 Info.offset = 0;
17412 Info.align = Align(16);
17414 return true;
17415 case Intrinsic::ppc_altivec_lvx:
17416 case Intrinsic::ppc_altivec_lvxl:
17417 case Intrinsic::ppc_altivec_lvebx:
17418 case Intrinsic::ppc_altivec_lvehx:
17419 case Intrinsic::ppc_altivec_lvewx:
17420 case Intrinsic::ppc_vsx_lxvd2x:
17421 case Intrinsic::ppc_vsx_lxvw4x:
17422 case Intrinsic::ppc_vsx_lxvd2x_be:
17423 case Intrinsic::ppc_vsx_lxvw4x_be:
17424 case Intrinsic::ppc_vsx_lxvl:
17425 case Intrinsic::ppc_vsx_lxvll: {
17426 EVT VT;
17427 switch (Intrinsic) {
17428 case Intrinsic::ppc_altivec_lvebx:
17429 VT = MVT::i8;
17430 break;
17431 case Intrinsic::ppc_altivec_lvehx:
17432 VT = MVT::i16;
17433 break;
17434 case Intrinsic::ppc_altivec_lvewx:
17435 VT = MVT::i32;
17436 break;
17437 case Intrinsic::ppc_vsx_lxvd2x:
17438 case Intrinsic::ppc_vsx_lxvd2x_be:
17439 VT = MVT::v2f64;
17440 break;
17441 default:
17442 VT = MVT::v4i32;
17443 break;
17444 }
17445
17446 Info.opc = ISD::INTRINSIC_W_CHAIN;
17447 Info.memVT = VT;
17448 Info.ptrVal = I.getArgOperand(0);
17449 Info.offset = -VT.getStoreSize()+1;
17450 Info.size = 2*VT.getStoreSize()-1;
17451 Info.align = Align(1);
17452 Info.flags = MachineMemOperand::MOLoad;
17453 return true;
17454 }
17455 case Intrinsic::ppc_altivec_stvx:
17456 case Intrinsic::ppc_altivec_stvxl:
17457 case Intrinsic::ppc_altivec_stvebx:
17458 case Intrinsic::ppc_altivec_stvehx:
17459 case Intrinsic::ppc_altivec_stvewx:
17460 case Intrinsic::ppc_vsx_stxvd2x:
17461 case Intrinsic::ppc_vsx_stxvw4x:
17462 case Intrinsic::ppc_vsx_stxvd2x_be:
17463 case Intrinsic::ppc_vsx_stxvw4x_be:
17464 case Intrinsic::ppc_vsx_stxvl:
17465 case Intrinsic::ppc_vsx_stxvll: {
17466 EVT VT;
17467 switch (Intrinsic) {
17468 case Intrinsic::ppc_altivec_stvebx:
17469 VT = MVT::i8;
17470 break;
17471 case Intrinsic::ppc_altivec_stvehx:
17472 VT = MVT::i16;
17473 break;
17474 case Intrinsic::ppc_altivec_stvewx:
17475 VT = MVT::i32;
17476 break;
17477 case Intrinsic::ppc_vsx_stxvd2x:
17478 case Intrinsic::ppc_vsx_stxvd2x_be:
17479 VT = MVT::v2f64;
17480 break;
17481 default:
17482 VT = MVT::v4i32;
17483 break;
17484 }
17485
17486 Info.opc = ISD::INTRINSIC_VOID;
17487 Info.memVT = VT;
17488 Info.ptrVal = I.getArgOperand(1);
17489 Info.offset = -VT.getStoreSize()+1;
17490 Info.size = 2*VT.getStoreSize()-1;
17491 Info.align = Align(1);
17492 Info.flags = MachineMemOperand::MOStore;
17493 return true;
17494 }
17495 case Intrinsic::ppc_stdcx:
17496 case Intrinsic::ppc_stwcx:
17497 case Intrinsic::ppc_sthcx:
17498 case Intrinsic::ppc_stbcx: {
17499 EVT VT;
17500 auto Alignment = Align(8);
17501 switch (Intrinsic) {
17502 case Intrinsic::ppc_stdcx:
17503 VT = MVT::i64;
17504 break;
17505 case Intrinsic::ppc_stwcx:
17506 VT = MVT::i32;
17507 Alignment = Align(4);
17508 break;
17509 case Intrinsic::ppc_sthcx:
17510 VT = MVT::i16;
17511 Alignment = Align(2);
17512 break;
17513 case Intrinsic::ppc_stbcx:
17514 VT = MVT::i8;
17515 Alignment = Align(1);
17516 break;
17517 }
17518 Info.opc = ISD::INTRINSIC_W_CHAIN;
17519 Info.memVT = VT;
17520 Info.ptrVal = I.getArgOperand(0);
17521 Info.offset = 0;
17522 Info.align = Alignment;
17524 return true;
17525 }
17526 default:
17527 break;
17528 }
17529
17530 return false;
17531}
17532
17533/// It returns EVT::Other if the type should be determined using generic
17534/// target-independent logic.
17536 const MemOp &Op, const AttributeList &FuncAttributes) const {
17537 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17538 // We should use Altivec/VSX loads and stores when available. For unaligned
17539 // addresses, unaligned VSX loads are only fast starting with the P8.
17540 if (Subtarget.hasAltivec() && Op.size() >= 16) {
17541 if (Op.isMemset() && Subtarget.hasVSX()) {
17542 uint64_t TailSize = Op.size() % 16;
17543 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17544 // element if vector element type matches tail store. For tail size
17545 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17546 if (TailSize > 2 && TailSize <= 4) {
17547 return MVT::v8i16;
17548 }
17549 return MVT::v4i32;
17550 }
17551 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17552 return MVT::v4i32;
17553 }
17554 }
17555
17556 if (Subtarget.isPPC64()) {
17557 return MVT::i64;
17558 }
17559
17560 return MVT::i32;
17561}
17562
17563/// Returns true if it is beneficial to convert a load of a constant
17564/// to just the constant itself.
17566 Type *Ty) const {
17567 assert(Ty->isIntegerTy());
17568
17569 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17570 return !(BitSize == 0 || BitSize > 64);
17571}
17572
17574 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17575 return false;
17576 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17577 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17578 return NumBits1 == 64 && NumBits2 == 32;
17579}
17580
17582 if (!VT1.isInteger() || !VT2.isInteger())
17583 return false;
17584 unsigned NumBits1 = VT1.getSizeInBits();
17585 unsigned NumBits2 = VT2.getSizeInBits();
17586 return NumBits1 == 64 && NumBits2 == 32;
17587}
17588
17590 // Generally speaking, zexts are not free, but they are free when they can be
17591 // folded with other operations.
17592 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17593 EVT MemVT = LD->getMemoryVT();
17594 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17595 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17596 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17597 LD->getExtensionType() == ISD::ZEXTLOAD))
17598 return true;
17599 }
17600
17601 // FIXME: Add other cases...
17602 // - 32-bit shifts with a zext to i64
17603 // - zext after ctlz, bswap, etc.
17604 // - zext after and by a constant mask
17605
17606 return TargetLowering::isZExtFree(Val, VT2);
17607}
17608
17609bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17610 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17611 "invalid fpext types");
17612 // Extending to float128 is not free.
17613 if (DestVT == MVT::f128)
17614 return false;
17615 return true;
17616}
17617
17619 return isInt<16>(Imm) || isUInt<16>(Imm);
17620}
17621
17623 return isInt<16>(Imm) || isUInt<16>(Imm);
17624}
17625
17628 unsigned *Fast) const {
17630 return false;
17631
17632 // PowerPC supports unaligned memory access for simple non-vector types.
17633 // Although accessing unaligned addresses is not as efficient as accessing
17634 // aligned addresses, it is generally more efficient than manual expansion,
17635 // and generally only traps for software emulation when crossing page
17636 // boundaries.
17637
17638 if (!VT.isSimple())
17639 return false;
17640
17641 if (VT.isFloatingPoint() && !VT.isVector() &&
17642 !Subtarget.allowsUnalignedFPAccess())
17643 return false;
17644
17645 if (VT.getSimpleVT().isVector()) {
17646 if (Subtarget.hasVSX()) {
17647 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17648 VT != MVT::v4f32 && VT != MVT::v4i32)
17649 return false;
17650 } else {
17651 return false;
17652 }
17653 }
17654
17655 if (VT == MVT::ppcf128)
17656 return false;
17657
17658 if (Fast)
17659 *Fast = 1;
17660
17661 return true;
17662}
17663
17665 SDValue C) const {
17666 // Check integral scalar types.
17667 if (!VT.isScalarInteger())
17668 return false;
17669 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17670 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17671 return false;
17672 // This transformation will generate >= 2 operations. But the following
17673 // cases will generate <= 2 instructions during ISEL. So exclude them.
17674 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17675 // HW instruction, ie. MULLI
17676 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17677 // instruction is needed than case 1, ie. MULLI and RLDICR
17678 int64_t Imm = ConstNode->getSExtValue();
17679 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17680 Imm >>= Shift;
17681 if (isInt<16>(Imm))
17682 return false;
17683 uint64_t UImm = static_cast<uint64_t>(Imm);
17684 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17685 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17686 return true;
17687 }
17688 return false;
17689}
17690
17696
17698 Type *Ty) const {
17699 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17700 return false;
17701 switch (Ty->getScalarType()->getTypeID()) {
17702 case Type::FloatTyID:
17703 case Type::DoubleTyID:
17704 return true;
17705 case Type::FP128TyID:
17706 return Subtarget.hasP9Vector();
17707 default:
17708 return false;
17709 }
17710}
17711
17712// FIXME: add more patterns which are not profitable to hoist.
17714 if (!I->hasOneUse())
17715 return true;
17716
17718 assert(User && "A single use instruction with no uses.");
17719
17720 switch (I->getOpcode()) {
17721 case Instruction::FMul: {
17722 // Don't break FMA, PowerPC prefers FMA.
17723 if (User->getOpcode() != Instruction::FSub &&
17724 User->getOpcode() != Instruction::FAdd)
17725 return true;
17726
17728 const Function *F = I->getFunction();
17729 const DataLayout &DL = F->getDataLayout();
17730 Type *Ty = User->getOperand(0)->getType();
17731
17732 return !(
17735 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17736 }
17737 case Instruction::Load: {
17738 // Don't break "store (load float*)" pattern, this pattern will be combined
17739 // to "store (load int32)" in later InstCombine pass. See function
17740 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17741 // cycles than loading a 32 bit integer.
17742 LoadInst *LI = cast<LoadInst>(I);
17743 // For the loads that combineLoadToOperationType does nothing, like
17744 // ordered load, it should be profitable to hoist them.
17745 // For swifterror load, it can only be used for pointer to pointer type, so
17746 // later type check should get rid of this case.
17747 if (!LI->isUnordered())
17748 return true;
17749
17750 if (User->getOpcode() != Instruction::Store)
17751 return true;
17752
17753 if (I->getType()->getTypeID() != Type::FloatTyID)
17754 return true;
17755
17756 return false;
17757 }
17758 default:
17759 return true;
17760 }
17761 return true;
17762}
17763
17764const MCPhysReg *
17766 // LR is a callee-save register, but we must treat it as clobbered by any call
17767 // site. Hence we include LR in the scratch registers, which are in turn added
17768 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17769 // to CTR, which is used by any indirect call.
17770 static const MCPhysReg ScratchRegs[] = {
17771 PPC::X12, PPC::LR8, PPC::CTR8, 0
17772 };
17773
17774 return ScratchRegs;
17775}
17776
17778 const Constant *PersonalityFn) const {
17779 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17780}
17781
17783 const Constant *PersonalityFn) const {
17784 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17785}
17786
17787bool
17789 EVT VT , unsigned DefinedValues) const {
17790 if (VT == MVT::v2i64)
17791 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17792
17793 if (Subtarget.hasVSX())
17794 return true;
17795
17797}
17798
17805
17806// Create a fast isel object.
17807FastISel *
17809 const TargetLibraryInfo *LibInfo) const {
17810 return PPC::createFastISel(FuncInfo, LibInfo);
17811}
17812
17813// 'Inverted' means the FMA opcode after negating one multiplicand.
17814// For example, (fma -a b c) = (fnmsub a b c)
17815static unsigned invertFMAOpcode(unsigned Opc) {
17816 switch (Opc) {
17817 default:
17818 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17819 case ISD::FMA:
17820 return PPCISD::FNMSUB;
17821 case PPCISD::FNMSUB:
17822 return ISD::FMA;
17823 }
17824}
17825
17827 bool LegalOps, bool OptForSize,
17829 unsigned Depth) const {
17831 return SDValue();
17832
17833 unsigned Opc = Op.getOpcode();
17834 EVT VT = Op.getValueType();
17835 SDNodeFlags Flags = Op.getNode()->getFlags();
17836
17837 switch (Opc) {
17838 case PPCISD::FNMSUB:
17839 if (!Op.hasOneUse() || !isTypeLegal(VT))
17840 break;
17841
17843 SDValue N0 = Op.getOperand(0);
17844 SDValue N1 = Op.getOperand(1);
17845 SDValue N2 = Op.getOperand(2);
17846 SDLoc Loc(Op);
17847
17849 SDValue NegN2 =
17850 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17851
17852 if (!NegN2)
17853 return SDValue();
17854
17855 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17856 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17857 // These transformations may change sign of zeroes. For example,
17858 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17859 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17860 // Try and choose the cheaper one to negate.
17862 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17863 N0Cost, Depth + 1);
17864
17866 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17867 N1Cost, Depth + 1);
17868
17869 if (NegN0 && N0Cost <= N1Cost) {
17870 Cost = std::min(N0Cost, N2Cost);
17871 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17872 } else if (NegN1) {
17873 Cost = std::min(N1Cost, N2Cost);
17874 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17875 }
17876 }
17877
17878 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17879 if (isOperationLegal(ISD::FMA, VT)) {
17880 Cost = N2Cost;
17881 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17882 }
17883
17884 break;
17885 }
17886
17887 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17888 Cost, Depth);
17889}
17890
17891// Override to enable LOAD_STACK_GUARD lowering on Linux.
17893 if (!Subtarget.isTargetLinux())
17895 return true;
17896}
17897
17898// Override to disable global variable loading on Linux and insert AIX canary
17899// word declaration.
17901 if (Subtarget.isAIXABI()) {
17902 M.getOrInsertGlobal(AIXSSPCanaryWordName,
17903 PointerType::getUnqual(M.getContext()));
17904 return;
17905 }
17906 if (!Subtarget.isTargetLinux())
17908}
17909
17911 if (Subtarget.isAIXABI())
17912 return M.getGlobalVariable(AIXSSPCanaryWordName);
17914}
17915
17917 bool ForCodeSize) const {
17918 if (!VT.isSimple() || !Subtarget.hasVSX())
17919 return false;
17920
17921 switch(VT.getSimpleVT().SimpleTy) {
17922 default:
17923 // For FP types that are currently not supported by PPC backend, return
17924 // false. Examples: f16, f80.
17925 return false;
17926 case MVT::f32:
17927 case MVT::f64: {
17928 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
17929 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17930 return true;
17931 }
17932 bool IsExact;
17933 APSInt IntResult(16, false);
17934 // The rounding mode doesn't really matter because we only care about floats
17935 // that can be converted to integers exactly.
17936 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17937 // For exact values in the range [-16, 15] we can materialize the float.
17938 if (IsExact && IntResult <= 15 && IntResult >= -16)
17939 return true;
17940 return Imm.isZero();
17941 }
17942 case MVT::ppcf128:
17943 return Imm.isPosZero();
17944 }
17945}
17946
17947// For vector shift operation op, fold
17948// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17950 SelectionDAG &DAG) {
17951 SDValue N0 = N->getOperand(0);
17952 SDValue N1 = N->getOperand(1);
17953 EVT VT = N0.getValueType();
17954 unsigned OpSizeInBits = VT.getScalarSizeInBits();
17955 unsigned Opcode = N->getOpcode();
17956 unsigned TargetOpcode;
17957
17958 switch (Opcode) {
17959 default:
17960 llvm_unreachable("Unexpected shift operation");
17961 case ISD::SHL:
17962 TargetOpcode = PPCISD::SHL;
17963 break;
17964 case ISD::SRL:
17965 TargetOpcode = PPCISD::SRL;
17966 break;
17967 case ISD::SRA:
17968 TargetOpcode = PPCISD::SRA;
17969 break;
17970 }
17971
17972 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17973 N1->getOpcode() == ISD::AND)
17974 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17975 if (Mask->getZExtValue() == OpSizeInBits - 1)
17976 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17977
17978 return SDValue();
17979}
17980
17981SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17982 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17983 return Value;
17984
17985 SDValue N0 = N->getOperand(0);
17986 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17987 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17988 N0.getOpcode() != ISD::SIGN_EXTEND ||
17989 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17990 N->getValueType(0) != MVT::i64)
17991 return SDValue();
17992
17993 // We can't save an operation here if the value is already extended, and
17994 // the existing shift is easier to combine.
17995 SDValue ExtsSrc = N0.getOperand(0);
17996 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17997 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17998 return SDValue();
17999
18000 SDLoc DL(N0);
18001 SDValue ShiftBy = SDValue(CN1, 0);
18002 // We want the shift amount to be i32 on the extswli, but the shift could
18003 // have an i64.
18004 if (ShiftBy.getValueType() == MVT::i64)
18005 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
18006
18007 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
18008 ShiftBy);
18009}
18010
18011SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
18012 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18013 return Value;
18014
18015 return SDValue();
18016}
18017
18018SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
18019 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18020 return Value;
18021
18022 return SDValue();
18023}
18024
18025// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
18026// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
18027// When C is zero, the equation (addi Z, -C) can be simplified to Z
18028// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
18030 const PPCSubtarget &Subtarget) {
18031 if (!Subtarget.isPPC64())
18032 return SDValue();
18033
18034 SDValue LHS = N->getOperand(0);
18035 SDValue RHS = N->getOperand(1);
18036
18037 auto isZextOfCompareWithConstant = [](SDValue Op) {
18038 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
18039 Op.getValueType() != MVT::i64)
18040 return false;
18041
18042 SDValue Cmp = Op.getOperand(0);
18043 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
18044 Cmp.getOperand(0).getValueType() != MVT::i64)
18045 return false;
18046
18047 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
18048 int64_t NegConstant = 0 - Constant->getSExtValue();
18049 // Due to the limitations of the addi instruction,
18050 // -C is required to be [-32768, 32767].
18051 return isInt<16>(NegConstant);
18052 }
18053
18054 return false;
18055 };
18056
18057 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
18058 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
18059
18060 // If there is a pattern, canonicalize a zext operand to the RHS.
18061 if (LHSHasPattern && !RHSHasPattern)
18062 std::swap(LHS, RHS);
18063 else if (!LHSHasPattern && !RHSHasPattern)
18064 return SDValue();
18065
18066 SDLoc DL(N);
18067 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
18068 SDValue Cmp = RHS.getOperand(0);
18069 SDValue Z = Cmp.getOperand(0);
18070 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
18071 int64_t NegConstant = 0 - Constant->getSExtValue();
18072
18073 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
18074 default: break;
18075 case ISD::SETNE: {
18076 // when C == 0
18077 // --> addze X, (addic Z, -1).carry
18078 // /
18079 // add X, (zext(setne Z, C))--
18080 // \ when -32768 <= -C <= 32767 && C != 0
18081 // --> addze X, (addic (addi Z, -C), -1).carry
18082 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18083 DAG.getConstant(NegConstant, DL, MVT::i64));
18084 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18085 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
18086 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
18087 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
18088 SDValue(Addc.getNode(), 1));
18089 }
18090 case ISD::SETEQ: {
18091 // when C == 0
18092 // --> addze X, (subfic Z, 0).carry
18093 // /
18094 // add X, (zext(sete Z, C))--
18095 // \ when -32768 <= -C <= 32767 && C != 0
18096 // --> addze X, (subfic (addi Z, -C), 0).carry
18097 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18098 DAG.getConstant(NegConstant, DL, MVT::i64));
18099 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18100 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
18101 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
18102 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
18103 SDValue(Subc.getNode(), 1));
18104 }
18105 }
18106
18107 return SDValue();
18108}
18109
18110// Transform
18111// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
18112// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
18113// In this case both C1 and C2 must be known constants.
18114// C1+C2 must fit into a 34 bit signed integer.
18116 const PPCSubtarget &Subtarget) {
18117 if (!Subtarget.isUsingPCRelativeCalls())
18118 return SDValue();
18119
18120 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
18121 // If we find that node try to cast the Global Address and the Constant.
18122 SDValue LHS = N->getOperand(0);
18123 SDValue RHS = N->getOperand(1);
18124
18125 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18126 std::swap(LHS, RHS);
18127
18128 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18129 return SDValue();
18130
18131 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
18132 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
18133 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
18134
18135 // Check that both casts succeeded.
18136 if (!GSDN || !ConstNode)
18137 return SDValue();
18138
18139 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
18140 SDLoc DL(GSDN);
18141
18142 // The signed int offset needs to fit in 34 bits.
18143 if (!isInt<34>(NewOffset))
18144 return SDValue();
18145
18146 // The new global address is a copy of the old global address except
18147 // that it has the updated Offset.
18148 SDValue GA =
18149 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
18150 NewOffset, GSDN->getTargetFlags());
18151 SDValue MatPCRel =
18152 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
18153 return MatPCRel;
18154}
18155
18156SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
18157 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
18158 return Value;
18159
18160 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
18161 return Value;
18162
18163 return SDValue();
18164}
18165
18166// Detect TRUNCATE operations on bitcasts of float128 values.
18167// What we are looking for here is the situtation where we extract a subset
18168// of bits from a 128 bit float.
18169// This can be of two forms:
18170// 1) BITCAST of f128 feeding TRUNCATE
18171// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
18172// The reason this is required is because we do not have a legal i128 type
18173// and so we want to prevent having to store the f128 and then reload part
18174// of it.
18175SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
18176 DAGCombinerInfo &DCI) const {
18177 // If we are using CRBits then try that first.
18178 if (Subtarget.useCRBits()) {
18179 // Check if CRBits did anything and return that if it did.
18180 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
18181 return CRTruncValue;
18182 }
18183
18184 SDLoc dl(N);
18185 SDValue Op0 = N->getOperand(0);
18186
18187 // Looking for a truncate of i128 to i64.
18188 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
18189 return SDValue();
18190
18191 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
18192
18193 // SRL feeding TRUNCATE.
18194 if (Op0.getOpcode() == ISD::SRL) {
18196 // The right shift has to be by 64 bits.
18197 if (!ConstNode || ConstNode->getZExtValue() != 64)
18198 return SDValue();
18199
18200 // Switch the element number to extract.
18201 EltToExtract = EltToExtract ? 0 : 1;
18202 // Update Op0 past the SRL.
18203 Op0 = Op0.getOperand(0);
18204 }
18205
18206 // BITCAST feeding a TRUNCATE possibly via SRL.
18207 if (Op0.getOpcode() == ISD::BITCAST &&
18208 Op0.getValueType() == MVT::i128 &&
18209 Op0.getOperand(0).getValueType() == MVT::f128) {
18210 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
18211 return DCI.DAG.getNode(
18212 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
18213 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
18214 }
18215 return SDValue();
18216}
18217
18218SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18219 SelectionDAG &DAG = DCI.DAG;
18220
18221 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
18222 if (!ConstOpOrElement)
18223 return SDValue();
18224
18225 // An imul is usually smaller than the alternative sequence for legal type.
18227 isOperationLegal(ISD::MUL, N->getValueType(0)))
18228 return SDValue();
18229
18230 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18231 switch (this->Subtarget.getCPUDirective()) {
18232 default:
18233 // TODO: enhance the condition for subtarget before pwr8
18234 return false;
18235 case PPC::DIR_PWR8:
18236 // type mul add shl
18237 // scalar 4 1 1
18238 // vector 7 2 2
18239 return true;
18240 case PPC::DIR_PWR9:
18241 case PPC::DIR_PWR10:
18242 case PPC::DIR_PWR11:
18244 // type mul add shl
18245 // scalar 5 2 2
18246 // vector 7 2 2
18247
18248 // The cycle RATIO of related operations are showed as a table above.
18249 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18250 // scalar and vector type. For 2 instrs patterns, add/sub + shl
18251 // are 4, it is always profitable; but for 3 instrs patterns
18252 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18253 // So we should only do it for vector type.
18254 return IsAddOne && IsNeg ? VT.isVector() : true;
18255 }
18256 };
18257
18258 EVT VT = N->getValueType(0);
18259 SDLoc DL(N);
18260
18261 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18262 bool IsNeg = MulAmt.isNegative();
18263 APInt MulAmtAbs = MulAmt.abs();
18264
18265 if ((MulAmtAbs - 1).isPowerOf2()) {
18266 // (mul x, 2^N + 1) => (add (shl x, N), x)
18267 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18268
18269 if (!IsProfitable(IsNeg, true, VT))
18270 return SDValue();
18271
18272 SDValue Op0 = N->getOperand(0);
18273 SDValue Op1 =
18274 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18275 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18276 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18277
18278 if (!IsNeg)
18279 return Res;
18280
18281 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18282 } else if ((MulAmtAbs + 1).isPowerOf2()) {
18283 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18284 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18285
18286 if (!IsProfitable(IsNeg, false, VT))
18287 return SDValue();
18288
18289 SDValue Op0 = N->getOperand(0);
18290 SDValue Op1 =
18291 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18292 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18293
18294 if (!IsNeg)
18295 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18296 else
18297 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18298
18299 } else {
18300 return SDValue();
18301 }
18302}
18303
18304// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18305// in combiner since we need to check SD flags and other subtarget features.
18306SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18307 DAGCombinerInfo &DCI) const {
18308 SDValue N0 = N->getOperand(0);
18309 SDValue N1 = N->getOperand(1);
18310 SDValue N2 = N->getOperand(2);
18311 SDNodeFlags Flags = N->getFlags();
18312 EVT VT = N->getValueType(0);
18313 SelectionDAG &DAG = DCI.DAG;
18315 unsigned Opc = N->getOpcode();
18316 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18317 bool LegalOps = !DCI.isBeforeLegalizeOps();
18318 SDLoc Loc(N);
18319
18320 if (!isOperationLegal(ISD::FMA, VT))
18321 return SDValue();
18322
18323 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18324 // since (fnmsub a b c)=-0 while c-ab=+0.
18325 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18326 return SDValue();
18327
18328 // (fma (fneg a) b c) => (fnmsub a b c)
18329 // (fnmsub (fneg a) b c) => (fma a b c)
18330 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18331 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18332
18333 // (fma a (fneg b) c) => (fnmsub a b c)
18334 // (fnmsub a (fneg b) c) => (fma a b c)
18335 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18336 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18337
18338 return SDValue();
18339}
18340
18341bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18342 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18343 if (!Subtarget.is64BitELFABI())
18344 return false;
18345
18346 // If not a tail call then no need to proceed.
18347 if (!CI->isTailCall())
18348 return false;
18349
18350 // If sibling calls have been disabled and tail-calls aren't guaranteed
18351 // there is no reason to duplicate.
18352 auto &TM = getTargetMachine();
18353 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18354 return false;
18355
18356 // Can't tail call a function called indirectly, or if it has variadic args.
18357 const Function *Callee = CI->getCalledFunction();
18358 if (!Callee || Callee->isVarArg())
18359 return false;
18360
18361 // Make sure the callee and caller calling conventions are eligible for tco.
18362 const Function *Caller = CI->getParent()->getParent();
18363 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18364 CI->getCallingConv()))
18365 return false;
18366
18367 // If the function is local then we have a good chance at tail-calling it
18368 return getTargetMachine().shouldAssumeDSOLocal(Callee);
18369}
18370
18371bool PPCTargetLowering::
18372isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18373 const Value *Mask = AndI.getOperand(1);
18374 // If the mask is suitable for andi. or andis. we should sink the and.
18375 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18376 // Can't handle constants wider than 64-bits.
18377 if (CI->getBitWidth() > 64)
18378 return false;
18379 int64_t ConstVal = CI->getZExtValue();
18380 return isUInt<16>(ConstVal) ||
18381 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18382 }
18383
18384 // For non-constant masks, we can always use the record-form and.
18385 return true;
18386}
18387
18388/// getAddrModeForFlags - Based on the set of address flags, select the most
18389/// optimal instruction format to match by.
18390PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18391 // This is not a node we should be handling here.
18392 if (Flags == PPC::MOF_None)
18393 return PPC::AM_None;
18394 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18395 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18396 if ((Flags & FlagSet) == FlagSet)
18397 return PPC::AM_DForm;
18398 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18399 if ((Flags & FlagSet) == FlagSet)
18400 return PPC::AM_DSForm;
18401 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18402 if ((Flags & FlagSet) == FlagSet)
18403 return PPC::AM_DQForm;
18404 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18405 if ((Flags & FlagSet) == FlagSet)
18406 return PPC::AM_PrefixDForm;
18407 // If no other forms are selected, return an X-Form as it is the most
18408 // general addressing mode.
18409 return PPC::AM_XForm;
18410}
18411
18412/// Set alignment flags based on whether or not the Frame Index is aligned.
18413/// Utilized when computing flags for address computation when selecting
18414/// load and store instructions.
18415static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18416 SelectionDAG &DAG) {
18417 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18418 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18419 if (!FI)
18420 return;
18422 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18423 // If this is (add $FI, $S16Imm), the alignment flags are already set
18424 // based on the immediate. We just need to clear the alignment flags
18425 // if the FI alignment is weaker.
18426 if ((FrameIndexAlign % 4) != 0)
18427 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18428 if ((FrameIndexAlign % 16) != 0)
18429 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18430 // If the address is a plain FrameIndex, set alignment flags based on
18431 // FI alignment.
18432 if (!IsAdd) {
18433 if ((FrameIndexAlign % 4) == 0)
18434 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18435 if ((FrameIndexAlign % 16) == 0)
18436 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18437 }
18438}
18439
18440/// Given a node, compute flags that are used for address computation when
18441/// selecting load and store instructions. The flags computed are stored in
18442/// FlagSet. This function takes into account whether the node is a constant,
18443/// an ADD, OR, or a constant, and computes the address flags accordingly.
18444static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18445 SelectionDAG &DAG) {
18446 // Set the alignment flags for the node depending on if the node is
18447 // 4-byte or 16-byte aligned.
18448 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18449 if ((Imm & 0x3) == 0)
18450 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18451 if ((Imm & 0xf) == 0)
18452 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18453 };
18454
18456 // All 32-bit constants can be computed as LIS + Disp.
18457 const APInt &ConstImm = CN->getAPIntValue();
18458 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18459 FlagSet |= PPC::MOF_AddrIsSImm32;
18460 SetAlignFlagsForImm(ConstImm.getZExtValue());
18461 setAlignFlagsForFI(N, FlagSet, DAG);
18462 }
18463 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18464 FlagSet |= PPC::MOF_RPlusSImm34;
18465 else // Let constant materialization handle large constants.
18466 FlagSet |= PPC::MOF_NotAddNorCst;
18467 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18468 // This address can be represented as an addition of:
18469 // - Register + Imm16 (possibly a multiple of 4/16)
18470 // - Register + Imm34
18471 // - Register + PPCISD::Lo
18472 // - Register + Register
18473 // In any case, we won't have to match this as Base + Zero.
18474 SDValue RHS = N.getOperand(1);
18476 const APInt &ConstImm = CN->getAPIntValue();
18477 if (ConstImm.isSignedIntN(16)) {
18478 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18479 SetAlignFlagsForImm(ConstImm.getZExtValue());
18480 setAlignFlagsForFI(N, FlagSet, DAG);
18481 }
18482 if (ConstImm.isSignedIntN(34))
18483 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18484 else
18485 FlagSet |= PPC::MOF_RPlusR; // Register.
18486 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18487 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18488 else
18489 FlagSet |= PPC::MOF_RPlusR;
18490 } else { // The address computation is not a constant or an addition.
18491 setAlignFlagsForFI(N, FlagSet, DAG);
18492 FlagSet |= PPC::MOF_NotAddNorCst;
18493 }
18494}
18495
18503
18504/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18505/// the address flags of the load/store instruction that is to be matched.
18506unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18507 SelectionDAG &DAG) const {
18508 unsigned FlagSet = PPC::MOF_None;
18509
18510 // Compute subtarget flags.
18511 if (!Subtarget.hasP9Vector())
18512 FlagSet |= PPC::MOF_SubtargetBeforeP9;
18513 else
18514 FlagSet |= PPC::MOF_SubtargetP9;
18515
18516 if (Subtarget.hasPrefixInstrs())
18517 FlagSet |= PPC::MOF_SubtargetP10;
18518
18519 if (Subtarget.hasSPE())
18520 FlagSet |= PPC::MOF_SubtargetSPE;
18521
18522 // Check if we have a PCRel node and return early.
18523 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18524 return FlagSet;
18525
18526 // If the node is the paired load/store intrinsics, compute flags for
18527 // address computation and return early.
18528 unsigned ParentOp = Parent->getOpcode();
18529 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18530 (ParentOp == ISD::INTRINSIC_VOID))) {
18531 unsigned ID = Parent->getConstantOperandVal(1);
18532 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18533 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18534 ? Parent->getOperand(2)
18535 : Parent->getOperand(3);
18536 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18537 FlagSet |= PPC::MOF_Vector;
18538 return FlagSet;
18539 }
18540 }
18541
18542 // Mark this as something we don't want to handle here if it is atomic
18543 // or pre-increment instruction.
18544 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18545 if (LSB->isIndexed())
18546 return PPC::MOF_None;
18547
18548 // Compute in-memory type flags. This is based on if there are scalars,
18549 // floats or vectors.
18550 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18551 assert(MN && "Parent should be a MemSDNode!");
18552 EVT MemVT = MN->getMemoryVT();
18553 unsigned Size = MemVT.getSizeInBits();
18554 if (MemVT.isScalarInteger()) {
18555 assert(Size <= 128 &&
18556 "Not expecting scalar integers larger than 16 bytes!");
18557 if (Size < 32)
18558 FlagSet |= PPC::MOF_SubWordInt;
18559 else if (Size == 32)
18560 FlagSet |= PPC::MOF_WordInt;
18561 else
18562 FlagSet |= PPC::MOF_DoubleWordInt;
18563 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18564 if (Size == 128)
18565 FlagSet |= PPC::MOF_Vector;
18566 else if (Size == 256) {
18567 assert(Subtarget.pairedVectorMemops() &&
18568 "256-bit vectors are only available when paired vector memops is "
18569 "enabled!");
18570 FlagSet |= PPC::MOF_Vector;
18571 } else
18572 llvm_unreachable("Not expecting illegal vectors!");
18573 } else { // Floating point type: can be scalar, f128 or vector types.
18574 if (Size == 32 || Size == 64)
18575 FlagSet |= PPC::MOF_ScalarFloat;
18576 else if (MemVT == MVT::f128 || MemVT.isVector())
18577 FlagSet |= PPC::MOF_Vector;
18578 else
18579 llvm_unreachable("Not expecting illegal scalar floats!");
18580 }
18581
18582 // Compute flags for address computation.
18583 computeFlagsForAddressComputation(N, FlagSet, DAG);
18584
18585 // Compute type extension flags.
18586 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18587 switch (LN->getExtensionType()) {
18588 case ISD::SEXTLOAD:
18589 FlagSet |= PPC::MOF_SExt;
18590 break;
18591 case ISD::EXTLOAD:
18592 case ISD::ZEXTLOAD:
18593 FlagSet |= PPC::MOF_ZExt;
18594 break;
18595 case ISD::NON_EXTLOAD:
18596 FlagSet |= PPC::MOF_NoExt;
18597 break;
18598 }
18599 } else
18600 FlagSet |= PPC::MOF_NoExt;
18601
18602 // For integers, no extension is the same as zero extension.
18603 // We set the extension mode to zero extension so we don't have
18604 // to add separate entries in AddrModesMap for loads and stores.
18605 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18606 FlagSet |= PPC::MOF_ZExt;
18607 FlagSet &= ~PPC::MOF_NoExt;
18608 }
18609
18610 // If we don't have prefixed instructions, 34-bit constants should be
18611 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18612 bool IsNonP1034BitConst =
18614 FlagSet) == PPC::MOF_RPlusSImm34;
18615 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18616 IsNonP1034BitConst)
18617 FlagSet |= PPC::MOF_NotAddNorCst;
18618
18619 return FlagSet;
18620}
18621
18622/// SelectForceXFormMode - Given the specified address, force it to be
18623/// represented as an indexed [r+r] operation (an XForm instruction).
18625 SDValue &Base,
18626 SelectionDAG &DAG) const {
18627
18629 int16_t ForceXFormImm = 0;
18630 if (provablyDisjointOr(DAG, N) &&
18631 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18632 Disp = N.getOperand(0);
18633 Base = N.getOperand(1);
18634 return Mode;
18635 }
18636
18637 // If the address is the result of an add, we will utilize the fact that the
18638 // address calculation includes an implicit add. However, we can reduce
18639 // register pressure if we do not materialize a constant just for use as the
18640 // index register. We only get rid of the add if it is not an add of a
18641 // value and a 16-bit signed constant and both have a single use.
18642 if (N.getOpcode() == ISD::ADD &&
18643 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18644 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18645 Disp = N.getOperand(0);
18646 Base = N.getOperand(1);
18647 return Mode;
18648 }
18649
18650 // Otherwise, use R0 as the base register.
18651 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18652 N.getValueType());
18653 Base = N;
18654
18655 return Mode;
18656}
18657
18659 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18660 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18661 EVT ValVT = Val.getValueType();
18662 // If we are splitting a scalar integer into f64 parts (i.e. so they
18663 // can be placed into VFRC registers), we need to zero extend and
18664 // bitcast the values. This will ensure the value is placed into a
18665 // VSR using direct moves or stack operations as needed.
18666 if (PartVT == MVT::f64 &&
18667 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18668 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18669 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18670 Parts[0] = Val;
18671 return true;
18672 }
18673 return false;
18674}
18675
18676SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18677 SelectionDAG &DAG) const {
18678 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18680 EVT RetVT = Op.getValueType();
18681 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18682 SDValue Callee =
18683 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18684 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18687 for (const SDValue &N : Op->op_values()) {
18688 EVT ArgVT = N.getValueType();
18689 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18690 Entry.Node = N;
18691 Entry.Ty = ArgTy;
18692 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18693 Entry.IsZExt = !Entry.IsSExt;
18694 Args.push_back(Entry);
18695 }
18696
18697 SDValue InChain = DAG.getEntryNode();
18698 SDValue TCChain = InChain;
18699 const Function &F = DAG.getMachineFunction().getFunction();
18700 bool isTailCall =
18701 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18702 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18703 if (isTailCall)
18704 InChain = TCChain;
18705 CLI.setDebugLoc(SDLoc(Op))
18706 .setChain(InChain)
18707 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18708 .setTailCall(isTailCall)
18709 .setSExtResult(SignExtend)
18710 .setZExtResult(!SignExtend)
18712 return TLI.LowerCallTo(CLI).first;
18713}
18714
18715SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18716 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18717 SelectionDAG &DAG) const {
18718 if (Op.getValueType() == MVT::f32)
18719 return lowerToLibCall(LibCallFloatName, Op, DAG);
18720
18721 if (Op.getValueType() == MVT::f64)
18722 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18723
18724 return SDValue();
18725}
18726
18727bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18728 SDNodeFlags Flags = Op.getNode()->getFlags();
18729 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18730 Flags.hasNoNaNs() && Flags.hasNoInfs();
18731}
18732
18733bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18734 return Op.getNode()->getFlags().hasApproximateFuncs();
18735}
18736
18737bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18739}
18740
18741SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18742 const char *LibCallFloatName,
18743 const char *LibCallDoubleNameFinite,
18744 const char *LibCallFloatNameFinite,
18745 SDValue Op,
18746 SelectionDAG &DAG) const {
18747 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18748 return SDValue();
18749
18750 if (!isLowringToMASSFiniteSafe(Op))
18751 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18752 DAG);
18753
18754 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18755 LibCallDoubleNameFinite, Op, DAG);
18756}
18757
18758SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18759 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18760 "__xl_powf_finite", Op, DAG);
18761}
18762
18763SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18764 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18765 "__xl_sinf_finite", Op, DAG);
18766}
18767
18768SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18769 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18770 "__xl_cosf_finite", Op, DAG);
18771}
18772
18773SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18774 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18775 "__xl_logf_finite", Op, DAG);
18776}
18777
18778SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18779 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18780 "__xl_log10f_finite", Op, DAG);
18781}
18782
18783SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18784 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18785 "__xl_expf_finite", Op, DAG);
18786}
18787
18788// If we happen to match to an aligned D-Form, check if the Frame Index is
18789// adequately aligned. If it is not, reset the mode to match to X-Form.
18790static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18791 PPC::AddrMode &Mode) {
18793 return;
18794 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18795 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18796 Mode = PPC::AM_XForm;
18797}
18798
18799/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18800/// compute the address flags of the node, get the optimal address mode based
18801/// on the flags, and set the Base and Disp based on the address mode.
18803 SDValue N, SDValue &Disp,
18804 SDValue &Base,
18805 SelectionDAG &DAG,
18806 MaybeAlign Align) const {
18807 SDLoc DL(Parent);
18808
18809 // Compute the address flags.
18810 unsigned Flags = computeMOFlags(Parent, N, DAG);
18811
18812 // Get the optimal address mode based on the Flags.
18813 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18814
18815 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18816 // Select an X-Form load if it is not.
18817 setXFormForUnalignedFI(N, Flags, Mode);
18818
18819 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18820 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18821 assert(Subtarget.isUsingPCRelativeCalls() &&
18822 "Must be using PC-Relative calls when a valid PC-Relative node is "
18823 "present!");
18824 Mode = PPC::AM_PCRel;
18825 }
18826
18827 // Set Base and Disp accordingly depending on the address mode.
18828 switch (Mode) {
18829 case PPC::AM_DForm:
18830 case PPC::AM_DSForm:
18831 case PPC::AM_DQForm: {
18832 // This is a register plus a 16-bit immediate. The base will be the
18833 // register and the displacement will be the immediate unless it
18834 // isn't sufficiently aligned.
18835 if (Flags & PPC::MOF_RPlusSImm16) {
18836 SDValue Op0 = N.getOperand(0);
18837 SDValue Op1 = N.getOperand(1);
18838 int16_t Imm = Op1->getAsZExtVal();
18839 if (!Align || isAligned(*Align, Imm)) {
18840 Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18841 Base = Op0;
18843 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18844 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18845 }
18846 break;
18847 }
18848 }
18849 // This is a register plus the @lo relocation. The base is the register
18850 // and the displacement is the global address.
18851 else if (Flags & PPC::MOF_RPlusLo) {
18852 Disp = N.getOperand(1).getOperand(0); // The global address.
18857 Base = N.getOperand(0);
18858 break;
18859 }
18860 // This is a constant address at most 32 bits. The base will be
18861 // zero or load-immediate-shifted and the displacement will be
18862 // the low 16 bits of the address.
18863 else if (Flags & PPC::MOF_AddrIsSImm32) {
18864 auto *CN = cast<ConstantSDNode>(N);
18865 EVT CNType = CN->getValueType(0);
18866 uint64_t CNImm = CN->getZExtValue();
18867 // If this address fits entirely in a 16-bit sext immediate field, codegen
18868 // this as "d, 0".
18869 int16_t Imm;
18870 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18871 Disp = DAG.getTargetConstant(Imm, DL, CNType);
18872 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18873 CNType);
18874 break;
18875 }
18876 // Handle 32-bit sext immediate with LIS + Addr mode.
18877 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18878 (!Align || isAligned(*Align, CNImm))) {
18879 int32_t Addr = (int32_t)CNImm;
18880 // Otherwise, break this down into LIS + Disp.
18881 Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18882 Base =
18883 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18884 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18885 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18886 break;
18887 }
18888 }
18889 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18890 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18892 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18893 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18894 } else
18895 Base = N;
18896 break;
18897 }
18898 case PPC::AM_PrefixDForm: {
18899 int64_t Imm34 = 0;
18900 unsigned Opcode = N.getOpcode();
18901 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18902 (isIntS34Immediate(N.getOperand(1), Imm34))) {
18903 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18904 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18905 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18906 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18907 else
18908 Base = N.getOperand(0);
18909 } else if (isIntS34Immediate(N, Imm34)) {
18910 // The address is a 34-bit signed immediate.
18911 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18912 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18913 }
18914 break;
18915 }
18916 case PPC::AM_PCRel: {
18917 // When selecting PC-Relative instructions, "Base" is not utilized as
18918 // we select the address as [PC+imm].
18919 Disp = N;
18920 break;
18921 }
18922 case PPC::AM_None:
18923 break;
18924 default: { // By default, X-Form is always available to be selected.
18925 // When a frame index is not aligned, we also match by XForm.
18927 Base = FI ? N : N.getOperand(1);
18928 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18929 N.getValueType())
18930 : N.getOperand(0);
18931 break;
18932 }
18933 }
18934 return Mode;
18935}
18936
18938 bool Return,
18939 bool IsVarArg) const {
18940 switch (CC) {
18941 case CallingConv::Cold:
18942 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18943 default:
18944 return CC_PPC64_ELF;
18945 }
18946}
18947
18949 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18950}
18951
18954 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18955 if (shouldInlineQuadwordAtomics() && Size == 128)
18957
18958 switch (AI->getOperation()) {
18964 default:
18966 }
18967
18968 llvm_unreachable("unreachable atomicrmw operation");
18969}
18970
18978
18979static Intrinsic::ID
18981 switch (BinOp) {
18982 default:
18983 llvm_unreachable("Unexpected AtomicRMW BinOp");
18985 return Intrinsic::ppc_atomicrmw_xchg_i128;
18986 case AtomicRMWInst::Add:
18987 return Intrinsic::ppc_atomicrmw_add_i128;
18988 case AtomicRMWInst::Sub:
18989 return Intrinsic::ppc_atomicrmw_sub_i128;
18990 case AtomicRMWInst::And:
18991 return Intrinsic::ppc_atomicrmw_and_i128;
18992 case AtomicRMWInst::Or:
18993 return Intrinsic::ppc_atomicrmw_or_i128;
18994 case AtomicRMWInst::Xor:
18995 return Intrinsic::ppc_atomicrmw_xor_i128;
18997 return Intrinsic::ppc_atomicrmw_nand_i128;
18998 }
18999}
19000
19002 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
19003 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
19004 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19005 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19006 Type *ValTy = Incr->getType();
19007 assert(ValTy->getPrimitiveSizeInBits() == 128);
19010 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19011 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
19012 Value *IncrHi =
19013 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
19014 Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
19015 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19016 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19017 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19018 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19019 return Builder.CreateOr(
19020 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19021}
19022
19024 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
19025 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
19026 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19027 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19028 Type *ValTy = CmpVal->getType();
19029 assert(ValTy->getPrimitiveSizeInBits() == 128);
19030 Function *IntCmpXchg =
19031 Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
19032 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19033 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
19034 Value *CmpHi =
19035 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
19036 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
19037 Value *NewHi =
19038 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
19039 emitLeadingFence(Builder, CI, Ord);
19040 Value *LoHi =
19041 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
19042 emitTrailingFence(Builder, CI, Ord);
19043 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19044 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19045 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19046 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19047 return Builder.CreateOr(
19048 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19049}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
#define Success
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Align
uint64_t Addr
uint64_t Size
bool End
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
static bool isConstantOrUndef(const SDValue Op)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
unsigned Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(PassOpts->AAPipeline)
const SmallVectorImpl< MachineOperand > & Cond
const MachineOperand & RHS
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:469
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:166
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:191
static bool is64Bit(const char *name)
Value * LHS
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5337
bool isDenormal() const
Definition APFloat.h:1361
APInt bitcastToAPInt() const
Definition APFloat.h:1266
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:227
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1400
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:442
APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:986
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1513
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1323
APInt abs() const
Get the absolute value.
Definition APInt.h:1766
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:322
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:428
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:464
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1693
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:433
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:299
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:289
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Definition ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
This is an SDNode representing atomic operations.
This class holds the attributes for a function, its return value, and its parameters.
Definition Attributes.h:468
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:61
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition Constants.h:893
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:271
This is the shared class of boolean and integer constants.
Definition Constants.h:83
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:195
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition DataLayout.h:459
A debug info location.
Definition DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:156
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:211
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:704
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:384
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:773
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:785
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:701
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:277
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:353
const Function & getFunction() const
Definition Function.h:171
arg_iterator arg_begin()
Definition Function.h:865
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:380
size_t arg_size() const
Definition Function.h:898
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:221
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:742
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:589
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
StringRef getSection() const
Definition Globals.cpp:183
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:124
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:91
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition MCExpr.h:394
Metadata node.
Definition Metadata.h:1069
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:65
PICLevel::Level getPICLevel() const
Returns the PIC level (small or large model)
Definition Module.cpp:582
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
bool isAIXABI() const
bool useSoftFloat() const
const PPCFrameLowering * getFrameLowering() const override
bool needsSwapsForVSXMemOps() const
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
const PPCInstrInfo * getInstrInfo() const override
bool isSVR4ABI() const
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
POPCNTDKind hasPOPCNTD() const
bool isLittleEndian() const
bool isTargetLinux() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
bool isPredictableSelectIsExpensive() const
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:95
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
size_t size() const
Definition SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:149
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:143
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:149
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:61
static Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:235
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:166
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:297
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:242
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:224
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
static IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:252
A Use represents the edge between a Value definition and its users.
Definition Use.h:43
Value * getOperand(unsigned i) const
Definition User.h:228
unsigned getNumOperands() const
Definition User.h:250
LLVM Value Representation.
Definition Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
User * user_back()
Definition Value.h:407
const ParentTy * getParent() const
Definition ilist_node.h:32
self_iterator getIterator()
Definition ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:243
@ TargetConstantPool
Definition ISDOpcodes.h:174
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:490
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:153
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:743
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:276
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:246
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:205
@ GlobalAddress
Definition ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:840
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:557
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:716
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:262
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:236
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:418
@ GlobalTLSAddress
Definition ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:804
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:464
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:634
@ TargetExternalSymbol
Definition ISDOpcodes.h:175
@ TargetJumpTable
Definition ISDOpcodes.h:173
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:521
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:756
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:229
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:170
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:930
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:614
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:587
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:549
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:810
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:771
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:848
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:696
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:765
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:463
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:100
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:456
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:886
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:190
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:919
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:437
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:816
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:793
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:347
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:691
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:171
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:529
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Create or insert an LLVM Function declaration for an intrinsic, and return it.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:92
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:200
@ MO_TPREL_HA
Definition PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:113
@ MO_TLS
Symbol for VK_PPC_TLS fixup attached to an ADD instruction.
Definition PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:140
@ MO_TPREL_LO
Definition PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:160
@ MO_HA
Definition PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ XMC_PR
Program Code.
Definition XCOFF.h:105
@ XTY_ER
External reference.
Definition XCOFF.h:241
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:480
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1723
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:169
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:296
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:682
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:215
unsigned M1(unsigned Val)
Definition VE.h:376
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1730
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:291
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:154
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition Error.cpp:167
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:193
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:159
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:125
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
To bit_cast(const From &from) noexcept
Definition bit.h:89
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1922
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:375
ArrayRef(const T &OneElt) -> ArrayRef< T >
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:186
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:563
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:581
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:327
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:210
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:860
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:281
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:254
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition APFloat.cpp:284
static constexpr roundingMode rmTowardZero
Definition APFloat.h:258
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:389
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:278
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:379
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:310
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:375
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:317
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:322
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:330
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:447
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:70
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:56
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)